Print this page
    
OS-5015 PT_INTERP headers should be permitted after PT_LOAD headers
OS-5451 comm page should not break i86xpv
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-5192 need faster clock_gettime
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
Reviewed by: Ryan Zezeski <ryan@zinascii.com>
OS-5293 lx brand: prelink(8)'d binaries core dump before main()
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-5072 lxbrand support PT_GNU_STACK
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-5202 Support AT_SECURE & AT_*ID in LX
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
OS-4824 Unlike Linux, nested interpreters don't work
(LX changes only, the rest were upstreamed...)
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Cody Mello <cody.mello@joyent.com>
OS-3735 modstubs MAXNARG is too low.
OS-3733 Verify b_native_exec exists before calling it
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-4119 lxbrand panic when running native perl inside lx zone
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-4128 programs that lack PT_PHDR are not properly loaded
OS-4141 freeing phdrs induces bad kmem_free() in elfexec()
backout OS-4141: needs more work
backout OS-4128: needs more work
OS-4141 freeing phdrs induces bad kmem_free() in elfexec()
OS-4128 programs that lack PT_PHDR are not properly loaded
OS-3696 lx brand: G-Portugol programs core dump
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-3517 lx brand: branded zones don't interpret .interp section
OS-3405 lx brand: socket() fails for PF_INET6
OS-3382 lxbrand 64bit gettimeofday depends on vsyscall or vdso
OS-3280 need a way to specify the root of a native system in the lx brand
OS-3279 lx brand should allow delegated datasets
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-2949 add support for AT_RANDOM aux vector entry
OS-2877 lx_librtld_db falls to load due to NULL DT_DEBUG
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/exec/elf/elf.c
          +++ new/usr/src/uts/common/exec/elf/elf.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  
    | 
      ↓ open down ↓ | 
    18 lines elided | 
    
      ↑ open up ↑ | 
  
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   */
  25   25  
  26   26  /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
  27   27  /*        All Rights Reserved   */
  28   28  /*
  29      - * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
       29 + * Copyright 2016 Joyent, Inc.
  30   30   */
  31   31  
  32   32  #include <sys/types.h>
  33   33  #include <sys/param.h>
  34   34  #include <sys/thread.h>
  35   35  #include <sys/sysmacros.h>
  36   36  #include <sys/signal.h>
  37   37  #include <sys/cred.h>
  38   38  #include <sys/user.h>
  39   39  #include <sys/errno.h>
  40   40  #include <sys/vnode.h>
  41   41  #include <sys/mman.h>
  42   42  #include <sys/kmem.h>
  43   43  #include <sys/proc.h>
  44   44  #include <sys/pathname.h>
  45   45  #include <sys/cmn_err.h>
  46   46  #include <sys/systm.h>
  47   47  #include <sys/elf.h>
  48   48  #include <sys/vmsystm.h>
  49   49  #include <sys/debug.h>
  50   50  #include <sys/auxv.h>
  51   51  #include <sys/exec.h>
  52   52  #include <sys/prsystm.h>
  53   53  #include <vm/as.h>
  54   54  #include <vm/rm.h>
  55   55  #include <vm/seg.h>
  56   56  #include <vm/seg_vn.h>
  57   57  #include <sys/modctl.h>
  58   58  #include <sys/systeminfo.h>
  
    | 
      ↓ open down ↓ | 
    19 lines elided | 
    
      ↑ open up ↑ | 
  
  59   59  #include <sys/vmparam.h>
  60   60  #include <sys/machelf.h>
  61   61  #include <sys/shm_impl.h>
  62   62  #include <sys/archsystm.h>
  63   63  #include <sys/fasttrap.h>
  64   64  #include <sys/brand.h>
  65   65  #include "elf_impl.h"
  66   66  #include <sys/sdt.h>
  67   67  #include <sys/siginfo.h>
  68   68  
       69 +#if defined(__x86)
       70 +#include <sys/comm_page_util.h>
       71 +#endif /* defined(__x86) */
       72 +
       73 +
  69   74  extern int at_flags;
  70   75  
  71   76  #define ORIGIN_STR      "ORIGIN"
  72   77  #define ORIGIN_STR_SIZE 6
  73   78  
  74   79  static int getelfhead(vnode_t *, cred_t *, Ehdr *, int *, int *, int *);
  75   80  static int getelfphdr(vnode_t *, cred_t *, const Ehdr *, int, caddr_t *,
  76   81      ssize_t *);
  77   82  static int getelfshdr(vnode_t *, cred_t *, const Ehdr *, int, int, caddr_t *,
  78   83      ssize_t *, caddr_t *, ssize_t *);
  79   84  static size_t elfsize(Ehdr *, int, caddr_t, uintptr_t *);
  80   85  static int mapelfexec(vnode_t *, Ehdr *, int, caddr_t,
  81   86      Phdr **, Phdr **, Phdr **, Phdr **, Phdr *,
  82   87      caddr_t *, caddr_t *, intptr_t *, intptr_t *, size_t, long *, size_t *);
  83   88  
  84   89  typedef enum {
  85   90          STR_CTF,
  86   91          STR_SYMTAB,
  87   92          STR_DYNSYM,
  88   93          STR_STRTAB,
  89   94          STR_DYNSTR,
  90   95          STR_SHSTRTAB,
  91   96          STR_NUM
  92   97  } shstrtype_t;
  93   98  
  94   99  static const char *shstrtab_data[] = {
  95  100          ".SUNW_ctf",
  96  101          ".symtab",
  97  102          ".dynsym",
  98  103          ".strtab",
  99  104          ".dynstr",
 100  105          ".shstrtab"
 101  106  };
 102  107  
 103  108  typedef struct shstrtab {
 104  109          int     sst_ndx[STR_NUM];
 105  110          int     sst_cur;
 106  111  } shstrtab_t;
 107  112  
 108  113  static void
 109  114  shstrtab_init(shstrtab_t *s)
 110  115  {
 111  116          bzero(&s->sst_ndx, sizeof (s->sst_ndx));
 112  117          s->sst_cur = 1;
 113  118  }
 114  119  
 115  120  static int
 116  121  shstrtab_ndx(shstrtab_t *s, shstrtype_t type)
 117  122  {
 118  123          int ret;
 119  124  
 120  125          if ((ret = s->sst_ndx[type]) != 0)
 121  126                  return (ret);
 122  127  
 123  128          ret = s->sst_ndx[type] = s->sst_cur;
 124  129          s->sst_cur += strlen(shstrtab_data[type]) + 1;
 125  130  
 126  131          return (ret);
 127  132  }
 128  133  
 129  134  static size_t
 130  135  shstrtab_size(const shstrtab_t *s)
 131  136  {
 132  137          return (s->sst_cur);
 133  138  }
 134  139  
 135  140  static void
 136  141  shstrtab_dump(const shstrtab_t *s, char *buf)
 137  142  {
 138  143          int i, ndx;
 139  144  
 140  145          *buf = '\0';
 141  146          for (i = 0; i < STR_NUM; i++) {
 142  147                  if ((ndx = s->sst_ndx[i]) != 0)
 143  148                          (void) strcpy(buf + ndx, shstrtab_data[i]);
 144  149          }
 145  150  }
 146  151  
 147  152  static int
 148  153  dtrace_safe_phdr(Phdr *phdrp, struct uarg *args, uintptr_t base)
 149  154  {
 150  155          ASSERT(phdrp->p_type == PT_SUNWDTRACE);
 151  156  
 152  157          /*
 153  158           * See the comment in fasttrap.h for information on how to safely
 154  159           * update this program header.
 155  160           */
  
    | 
      ↓ open down ↓ | 
    77 lines elided | 
    
      ↑ open up ↑ | 
  
 156  161          if (phdrp->p_memsz < PT_SUNWDTRACE_SIZE ||
 157  162              (phdrp->p_flags & (PF_R | PF_W | PF_X)) != (PF_R | PF_W | PF_X))
 158  163                  return (-1);
 159  164  
 160  165          args->thrptr = phdrp->p_vaddr + base;
 161  166  
 162  167          return (0);
 163  168  }
 164  169  
 165  170  /*
 166      - * Map in the executable pointed to by vp. Returns 0 on success.
      171 + * Map in the executable pointed to by vp. Returns 0 on success.  Note that
      172 + * this function currently has the maximum number of arguments allowed by
      173 + * modstubs on x86 (MAXNARG)!  Do _not_ add to this function signature without
      174 + * adding to MAXNARG.  (Better yet, do not add to this monster of a function
      175 + * signature!)
 167  176   */
 168  177  int
 169  178  mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
 170      -    intptr_t *voffset, caddr_t exec_file, int *interp, caddr_t *bssbase,
 171      -    caddr_t *brkbase, size_t *brksize, uintptr_t *lddatap)
      179 +    intptr_t *voffset, caddr_t exec_file, char **interpp, caddr_t *bssbase,
      180 +    caddr_t *brkbase, size_t *brksize, uintptr_t *lddatap, uintptr_t *minaddrp)
 172  181  {
 173  182          size_t          len;
 174  183          struct vattr    vat;
 175  184          caddr_t         phdrbase = NULL;
 176  185          ssize_t         phdrsize;
 177  186          int             nshdrs, shstrndx, nphdrs;
 178  187          int             error = 0;
 179  188          Phdr            *uphdr = NULL;
 180  189          Phdr            *junk = NULL;
 181  190          Phdr            *dynphdr = NULL;
 182  191          Phdr            *dtrphdr = NULL;
      192 +        char            *interp = NULL;
 183  193          uintptr_t       lddata;
 184  194          long            execsz;
 185  195          intptr_t        minaddr;
 186  196  
 187  197          if (lddatap != NULL)
 188  198                  *lddatap = NULL;
 189  199  
      200 +        if (minaddrp != NULL)
      201 +                *minaddrp = NULL;
      202 +
 190  203          if (error = execpermissions(vp, &vat, args)) {
 191  204                  uprintf("%s: Cannot execute %s\n", exec_file, args->pathname);
 192  205                  return (error);
 193  206          }
 194  207  
 195  208          if ((error = getelfhead(vp, CRED(), ehdr, &nshdrs, &shstrndx,
 196  209              &nphdrs)) != 0 ||
 197  210              (error = getelfphdr(vp, CRED(), ehdr, nphdrs, &phdrbase,
 198  211              &phdrsize)) != 0) {
 199  212                  uprintf("%s: Cannot read %s\n", exec_file, args->pathname);
 200  213                  return (error);
 201  214          }
 202  215  
 203  216          if ((len = elfsize(ehdr, nphdrs, phdrbase, &lddata)) == 0) {
 204  217                  uprintf("%s: Nothing to load in %s", exec_file, args->pathname);
  
    | 
      ↓ open down ↓ | 
    5 lines elided | 
    
      ↑ open up ↑ | 
  
 205  218                  kmem_free(phdrbase, phdrsize);
 206  219                  return (ENOEXEC);
 207  220          }
 208  221          if (lddatap != NULL)
 209  222                  *lddatap = lddata;
 210  223  
 211  224          if (error = mapelfexec(vp, ehdr, nphdrs, phdrbase, &uphdr, &dynphdr,
 212  225              &junk, &dtrphdr, NULL, bssbase, brkbase, voffset, &minaddr,
 213  226              len, &execsz, brksize)) {
 214  227                  uprintf("%s: Cannot map %s\n", exec_file, args->pathname);
      228 +                if (uphdr != NULL && uphdr->p_flags == 0)
      229 +                        kmem_free(uphdr, sizeof (Phdr));
 215  230                  kmem_free(phdrbase, phdrsize);
 216  231                  return (error);
 217  232          }
 218  233  
      234 +        if (minaddrp != NULL)
      235 +                *minaddrp = minaddr;
      236 +
 219  237          /*
 220      -         * Inform our caller if the executable needs an interpreter.
      238 +         * If the executable requires an interpreter, determine its name.
 221  239           */
 222      -        *interp = (dynphdr == NULL) ? 0 : 1;
      240 +        if (dynphdr != NULL) {
      241 +                ssize_t resid;
 223  242  
      243 +                if (dynphdr->p_filesz > MAXPATHLEN || dynphdr->p_filesz == 0) {
      244 +                        uprintf("%s: Invalid interpreter\n", exec_file);
      245 +                        kmem_free(phdrbase, phdrsize);
      246 +                        return (ENOEXEC);
      247 +                }
      248 +
      249 +                interp = kmem_alloc(MAXPATHLEN, KM_SLEEP);
      250 +
      251 +                if ((error = vn_rdwr(UIO_READ, vp, interp, dynphdr->p_filesz,
      252 +                    (offset_t)dynphdr->p_offset, UIO_SYSSPACE, 0,
      253 +                    (rlim64_t)0, CRED(), &resid)) != 0 || resid != 0 ||
      254 +                    interp[dynphdr->p_filesz - 1] != '\0') {
      255 +                        uprintf("%s: Cannot obtain interpreter pathname\n",
      256 +                            exec_file);
      257 +                        kmem_free(interp, MAXPATHLEN);
      258 +                        kmem_free(phdrbase, phdrsize);
      259 +                        return (error != 0 ? error : ENOEXEC);
      260 +                }
      261 +        }
      262 +
 224  263          /*
 225  264           * If this is a statically linked executable, voffset should indicate
 226  265           * the address of the executable itself (it normally holds the address
 227  266           * of the interpreter).
 228  267           */
 229      -        if (ehdr->e_type == ET_EXEC && *interp == 0)
      268 +        if (ehdr->e_type == ET_EXEC && interp == NULL)
 230  269                  *voffset = minaddr;
 231  270  
      271 +        /*
      272 +         * If the caller has asked for the interpreter name, return it (it's
      273 +         * up to the caller to free it); if the caller hasn't asked for it,
      274 +         * free it ourselves.
      275 +         */
      276 +        if (interpp != NULL) {
      277 +                *interpp = interp;
      278 +        } else if (interp != NULL) {
      279 +                kmem_free(interp, MAXPATHLEN);
      280 +        }
      281 +
 232  282          if (uphdr != NULL) {
 233  283                  *uphdr_vaddr = uphdr->p_vaddr;
      284 +
      285 +                if (uphdr->p_flags == 0)
      286 +                        kmem_free(uphdr, sizeof (Phdr));
      287 +        } else if (ehdr->e_type == ET_DYN) {
      288 +                /*
      289 +                 * If we don't have a uphdr, we'll apply the logic found
      290 +                 * in mapelfexec() and use the p_vaddr of the first PT_LOAD
      291 +                 * section as the base address of the object.
      292 +                 */
      293 +                Phdr *phdr = (Phdr *)phdrbase;
      294 +                int i, hsize = ehdr->e_phentsize;
      295 +
      296 +                for (i = nphdrs; i > 0; i--) {
      297 +                        if (phdr->p_type == PT_LOAD) {
      298 +                                *uphdr_vaddr = (uintptr_t)phdr->p_vaddr +
      299 +                                    ehdr->e_phoff;
      300 +                                break;
      301 +                        }
      302 +
      303 +                        phdr = (Phdr *)((caddr_t)phdr + hsize);
      304 +                }
      305 +
      306 +                /*
      307 +                 * If we don't have a PT_LOAD segment, we should have returned
      308 +                 * ENOEXEC when elfsize() returned 0, above.
      309 +                 */
      310 +                VERIFY(i > 0);
 234  311          } else {
 235  312                  *uphdr_vaddr = (Addr)-1;
 236  313          }
 237  314  
 238  315          kmem_free(phdrbase, phdrsize);
 239  316          return (error);
 240  317  }
 241  318  
 242  319  /*ARGSUSED*/
 243  320  int
 244  321  elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
 245  322      int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred,
 246      -    int brand_action)
      323 +    int *brand_action)
 247  324  {
 248  325          caddr_t         phdrbase = NULL;
 249  326          caddr_t         bssbase = 0;
 250  327          caddr_t         brkbase = 0;
 251  328          size_t          brksize = 0;
 252      -        ssize_t         dlnsize;
      329 +        ssize_t         dlnsize, nsize = 0;
 253  330          aux_entry_t     *aux;
 254  331          int             error;
 255  332          ssize_t         resid;
 256  333          int             fd = -1;
 257  334          intptr_t        voffset;
 258  335          Phdr            *dyphdr = NULL;
 259  336          Phdr            *stphdr = NULL;
 260  337          Phdr            *uphdr = NULL;
 261  338          Phdr            *junk = NULL;
 262  339          size_t          len;
 263  340          ssize_t         phdrsize;
 264  341          int             postfixsize = 0;
 265  342          int             i, hsize;
  
    | 
      ↓ open down ↓ | 
    3 lines elided | 
    
      ↑ open up ↑ | 
  
 266  343          Phdr            *phdrp;
 267  344          Phdr            *dataphdrp = NULL;
 268  345          Phdr            *dtrphdr;
 269  346          Phdr            *capphdr = NULL;
 270  347          Cap             *cap = NULL;
 271  348          ssize_t         capsize;
 272  349          int             hasu = 0;
 273  350          int             hasauxv = 0;
 274  351          int             hasdy = 0;
 275  352          int             branded = 0;
      353 +        int             dynuphdr = 0;
 276  354  
 277  355          struct proc *p = ttoproc(curthread);
 278  356          struct user *up = PTOU(p);
 279  357          struct bigwad {
 280  358                  Ehdr    ehdr;
 281  359                  aux_entry_t     elfargs[__KERN_NAUXV_IMPL];
 282  360                  char            dl_name[MAXPATHLEN];
 283  361                  char            pathbuf[MAXPATHLEN];
 284  362                  struct vattr    vattr;
 285  363                  struct execenv  exenv;
 286  364          } *bigwad;      /* kmem_alloc this behemoth so we don't blow stack */
 287  365          Ehdr            *ehdrp;
 288  366          int             nshdrs, shstrndx, nphdrs;
 289  367          char            *dlnp;
 290  368          char            *pathbufp;
 291  369          rlim64_t        limit;
 292  370          rlim64_t        roundlimit;
 293  371  
 294  372          ASSERT(p->p_model == DATAMODEL_ILP32 || p->p_model == DATAMODEL_LP64);
 295  373  
 296  374          bigwad = kmem_alloc(sizeof (struct bigwad), KM_SLEEP);
 297  375          ehdrp = &bigwad->ehdr;
 298  376          dlnp = bigwad->dl_name;
 299  377          pathbufp = bigwad->pathbuf;
 300  378  
 301  379          /*
 302  380           * Obtain ELF and program header information.
 303  381           */
 304  382          if ((error = getelfhead(vp, CRED(), ehdrp, &nshdrs, &shstrndx,
 305  383              &nphdrs)) != 0 ||
 306  384              (error = getelfphdr(vp, CRED(), ehdrp, nphdrs, &phdrbase,
 307  385              &phdrsize)) != 0)
 308  386                  goto out;
 309  387  
 310  388          /*
 311  389           * Prevent executing an ELF file that has no entry point.
 312  390           */
 313  391          if (ehdrp->e_entry == 0) {
 314  392                  uprintf("%s: Bad entry point\n", exec_file);
 315  393                  goto bad;
 316  394          }
 317  395  
 318  396          /*
 319  397           * Put data model that we're exec-ing to into the args passed to
  
    | 
      ↓ open down ↓ | 
    34 lines elided | 
    
      ↑ open up ↑ | 
  
 320  398           * exec_args(), so it will know what it is copying to on new stack.
 321  399           * Now that we know whether we are exec-ing a 32-bit or 64-bit
 322  400           * executable, we can set execsz with the appropriate NCARGS.
 323  401           */
 324  402  #ifdef  _LP64
 325  403          if (ehdrp->e_ident[EI_CLASS] == ELFCLASS32) {
 326  404                  args->to_model = DATAMODEL_ILP32;
 327  405                  *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS32-1);
 328  406          } else {
 329  407                  args->to_model = DATAMODEL_LP64;
 330      -                args->stk_prot &= ~PROT_EXEC;
      408 +                if (!args->stk_prot_override) {
      409 +                        args->stk_prot &= ~PROT_EXEC;
      410 +                }
 331  411  #if defined(__i386) || defined(__amd64)
 332  412                  args->dat_prot &= ~PROT_EXEC;
 333  413  #endif
 334  414                  *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS64-1);
 335  415          }
 336  416  #else   /* _LP64 */
 337  417          args->to_model = DATAMODEL_ILP32;
 338  418          *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS-1);
 339  419  #endif  /* _LP64 */
 340  420  
 341  421          /*
 342      -         * We delay invoking the brand callback until we've figured out
 343      -         * what kind of elf binary we're trying to run, 32-bit or 64-bit.
 344      -         * We do this because now the brand library can just check
 345      -         * args->to_model to see if the target is 32-bit or 64-bit without
 346      -         * having do duplicate all the code above.
      422 +         * We delay invoking the brand callback until we've figured out what
      423 +         * kind of elf binary we're trying to run, 32-bit or 64-bit.  We do this
      424 +         * because now the brand library can just check args->to_model to see if
      425 +         * the target is 32-bit or 64-bit without having do duplicate all the
      426 +         * code above.
 347  427           *
      428 +         * We also give the brand a chance to indicate that based on the ELF
      429 +         * OSABI of the target binary it should become unbranded and optionally
      430 +         * indicate that it should be treated as existing in a specific prefix.
      431 +         *
      432 +         * Note that if a brand opts to go down this route it does not actually
      433 +         * end up being debranded. In other words, future programs that exec
      434 +         * will still be considered for branding unless this escape hatch is
      435 +         * used. Consider the case of lx brand for example. If a user runs
      436 +         * /native/usr/sbin/dtrace -c /bin/ls, the isaexec and normal executable
      437 +         * of DTrace that's in /native will take this escape hatch and be run
      438 +         * and interpreted using the normal system call table; however, the
      439 +         * execution of a non-illumos binary in the form of /bin/ls will still
      440 +         * be branded and be subject to all of the normal actions of the brand.
      441 +         *
 348  442           * The level checks associated with brand handling below are used to
 349  443           * prevent a loop since the brand elfexec function typically comes back
 350  444           * through this function. We must check <= here since the nested
 351  445           * handling in the #! interpreter code will increment the level before
 352  446           * calling gexec to run the final elfexec interpreter.
 353  447           */
      448 +        if ((level <= INTP_MAXDEPTH) && (*brand_action != EBA_NATIVE) &&
      449 +            (PROC_IS_BRANDED(p)) && (BROP(p)->b_native_exec != NULL)) {
      450 +                if (BROP(p)->b_native_exec(ehdrp->e_ident[EI_OSABI],
      451 +                    &args->brand_nroot) == B_TRUE) {
      452 +                        ASSERT(ehdrp->e_ident[EI_OSABI]);
      453 +                        *brand_action = EBA_NATIVE;
      454 +                        /* Add one for the trailing '/' in the path */
      455 +                        if (args->brand_nroot != NULL)
      456 +                                nsize = strlen(args->brand_nroot) + 1;
      457 +                }
      458 +        }
      459 +
 354  460          if ((level <= INTP_MAXDEPTH) &&
 355      -            (brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
      461 +            (*brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
 356  462                  error = BROP(p)->b_elfexec(vp, uap, args,
 357  463                      idatap, level + 1, execsz, setid, exec_file, cred,
 358  464                      brand_action);
 359  465                  goto out;
 360  466          }
 361  467  
 362  468          /*
 363  469           * Determine aux size now so that stack can be built
 364  470           * in one shot (except actual copyout of aux image),
 365  471           * determine any non-default stack protections,
 366  472           * and still have this code be machine independent.
 367  473           */
 368  474          hsize = ehdrp->e_phentsize;
 369  475          phdrp = (Phdr *)phdrbase;
 370  476          for (i = nphdrs; i > 0; i--) {
 371  477                  switch (phdrp->p_type) {
 372  478                  case PT_INTERP:
 373  479                          hasauxv = hasdy = 1;
 374  480                          break;
 375  481                  case PT_PHDR:
 376  482                          hasu = 1;
 377  483                          break;
 378  484                  case PT_SUNWSTACK:
 379  485                          args->stk_prot = PROT_USER;
 380  486                          if (phdrp->p_flags & PF_R)
 381  487                                  args->stk_prot |= PROT_READ;
 382  488                          if (phdrp->p_flags & PF_W)
 383  489                                  args->stk_prot |= PROT_WRITE;
 384  490                          if (phdrp->p_flags & PF_X)
 385  491                                  args->stk_prot |= PROT_EXEC;
 386  492                          break;
 387  493                  case PT_LOAD:
 388  494                          dataphdrp = phdrp;
 389  495                          break;
 390  496                  case PT_SUNWCAP:
 391  497                          capphdr = phdrp;
 392  498                          break;
 393  499                  }
 394  500                  phdrp = (Phdr *)((caddr_t)phdrp + hsize);
 395  501          }
 396  502  
 397  503          if (ehdrp->e_type != ET_EXEC) {
 398  504                  dataphdrp = NULL;
 399  505                  hasauxv = 1;
 400  506          }
 401  507  
 402  508          /* Copy BSS permissions to args->dat_prot */
 403  509          if (dataphdrp != NULL) {
 404  510                  args->dat_prot = PROT_USER;
 405  511                  if (dataphdrp->p_flags & PF_R)
 406  512                          args->dat_prot |= PROT_READ;
 407  513                  if (dataphdrp->p_flags & PF_W)
 408  514                          args->dat_prot |= PROT_WRITE;
 409  515                  if (dataphdrp->p_flags & PF_X)
 410  516                          args->dat_prot |= PROT_EXEC;
 411  517          }
 412  518  
 413  519          /*
 414  520           * If a auxvector will be required - reserve the space for
 415  521           * it now.  This may be increased by exec_args if there are
  
    | 
      ↓ open down ↓ | 
    50 lines elided | 
    
      ↑ open up ↑ | 
  
 416  522           * ISA-specific types (included in __KERN_NAUXV_IMPL).
 417  523           */
 418  524          if (hasauxv) {
 419  525                  /*
 420  526                   * If a AUX vector is being built - the base AUX
 421  527                   * entries are:
 422  528                   *
 423  529                   *      AT_BASE
 424  530                   *      AT_FLAGS
 425  531                   *      AT_PAGESZ
      532 +                 *      AT_RANDOM
 426  533                   *      AT_SUN_AUXFLAGS
 427  534                   *      AT_SUN_HWCAP
 428  535                   *      AT_SUN_HWCAP2
 429  536                   *      AT_SUN_PLATFORM (added in stk_copyout)
 430  537                   *      AT_SUN_EXECNAME (added in stk_copyout)
 431  538                   *      AT_NULL
 432  539                   *
 433      -                 * total == 9
      540 +                 * total == 10
 434  541                   */
 435  542                  if (hasdy && hasu) {
 436  543                          /*
 437  544                           * Has PT_INTERP & PT_PHDR - the auxvectors that
 438  545                           * will be built are:
 439  546                           *
 440  547                           *      AT_PHDR
 441  548                           *      AT_PHENT
 442  549                           *      AT_PHNUM
 443  550                           *      AT_ENTRY
 444  551                           *      AT_LDDATA
 445  552                           *
 446  553                           * total = 5
 447  554                           */
 448      -                        args->auxsize = (9 + 5) * sizeof (aux_entry_t);
      555 +                        args->auxsize = (10 + 5) * sizeof (aux_entry_t);
 449  556                  } else if (hasdy) {
 450  557                          /*
 451  558                           * Has PT_INTERP but no PT_PHDR
 452  559                           *
 453  560                           *      AT_EXECFD
 454  561                           *      AT_LDDATA
 455  562                           *
 456  563                           * total = 2
 457  564                           */
 458      -                        args->auxsize = (9 + 2) * sizeof (aux_entry_t);
      565 +                        args->auxsize = (10 + 2) * sizeof (aux_entry_t);
 459  566                  } else {
 460      -                        args->auxsize = 9 * sizeof (aux_entry_t);
      567 +                        args->auxsize = 10 * sizeof (aux_entry_t);
 461  568                  }
 462  569          } else {
 463  570                  args->auxsize = 0;
 464  571          }
 465  572  
 466  573          /*
 467  574           * If this binary is using an emulator, we need to add an
 468  575           * AT_SUN_EMULATOR aux entry.
 469  576           */
 470  577          if (args->emulator != NULL)
 471  578                  args->auxsize += sizeof (aux_entry_t);
 472  579  
 473      -        if ((brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
      580 +        /*
      581 +         * If this is a native binary that's been given a modified interpreter
      582 +         * root, inform it that the native system exists at that root.
      583 +         */
      584 +        if (args->brand_nroot != NULL) {
      585 +                args->auxsize += sizeof (aux_entry_t);
      586 +        }
      587 +
      588 +
      589 +        /*
      590 +         * On supported kernels (x86_64) make room in the auxv for the
      591 +         * AT_SUN_COMMPAGE entry.  This will go unpopulated on i86xpv systems
      592 +         * which do not provide such functionality.
      593 +         */
      594 +#if defined(__amd64)
      595 +        args->auxsize += sizeof (aux_entry_t);
      596 +#endif /* defined(__amd64) */
      597 +
      598 +        /*
      599 +         * If we have user credentials, we'll supply the following entries:
      600 +         *      AT_SUN_UID
      601 +         *      AT_SUN_RUID
      602 +         *      AT_SUN_GID
      603 +         *      AT_SUN_RGID
      604 +         */
      605 +        if (cred != NULL) {
      606 +                args->auxsize += 4 * sizeof (aux_entry_t);
      607 +        }
      608 +
      609 +        if ((*brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
 474  610                  branded = 1;
 475  611                  /*
 476      -                 * We will be adding 4 entries to the aux vectors.  One for
 477      -                 * the the brandname and 3 for the brand specific aux vectors.
      612 +                 * We will be adding 5 entries to the aux vectors.  One for
      613 +                 * the the brandname and 4 for the brand specific aux vectors.
 478  614                   */
 479      -                args->auxsize += 4 * sizeof (aux_entry_t);
      615 +                args->auxsize += 5 * sizeof (aux_entry_t);
 480  616          }
 481  617  
 482  618          /* Hardware/Software capabilities */
 483  619          if (capphdr != NULL &&
 484  620              (capsize = capphdr->p_filesz) > 0 &&
 485  621              capsize <= 16 * sizeof (*cap)) {
 486  622                  int ncaps = capsize / sizeof (*cap);
 487  623                  Cap *cp;
 488  624  
 489  625                  cap = kmem_alloc(capsize, KM_SLEEP);
 490  626                  if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)cap,
 491  627                      capsize, (offset_t)capphdr->p_offset,
 492  628                      UIO_SYSSPACE, 0, (rlim64_t)0, CRED(), &resid)) != 0) {
 493  629                          uprintf("%s: Cannot read capabilities section\n",
 494  630                              exec_file);
 495  631                          goto out;
 496  632                  }
 497  633                  for (cp = cap; cp < cap + ncaps; cp++) {
 498  634                          if (cp->c_tag == CA_SUNW_SF_1 &&
 499  635                              (cp->c_un.c_val & SF1_SUNW_ADDR32)) {
 500  636                                  if (args->to_model == DATAMODEL_LP64)
 501  637                                          args->addr32 = 1;
 502  638                                  break;
 503  639                          }
 504  640                  }
 505  641          }
 506  642  
 507  643          aux = bigwad->elfargs;
 508  644          /*
 509  645           * Move args to the user's stack.
 510  646           * This can fill in the AT_SUN_PLATFORM and AT_SUN_EXECNAME aux entries.
 511  647           */
 512  648          if ((error = exec_args(uap, args, idatap, (void **)&aux)) != 0) {
 513  649                  if (error == -1) {
 514  650                          error = ENOEXEC;
 515  651                          goto bad;
 516  652                  }
 517  653                  goto out;
 518  654          }
 519  655          /* we're single threaded after this point */
 520  656  
 521  657          /*
 522  658           * If this is an ET_DYN executable (shared object),
 523  659           * determine its memory size so that mapelfexec() can load it.
 524  660           */
 525  661          if (ehdrp->e_type == ET_DYN)
 526  662                  len = elfsize(ehdrp, nphdrs, phdrbase, NULL);
  
    | 
      ↓ open down ↓ | 
    37 lines elided | 
    
      ↑ open up ↑ | 
  
 527  663          else
 528  664                  len = 0;
 529  665  
 530  666          dtrphdr = NULL;
 531  667  
 532  668          if ((error = mapelfexec(vp, ehdrp, nphdrs, phdrbase, &uphdr, &dyphdr,
 533  669              &stphdr, &dtrphdr, dataphdrp, &bssbase, &brkbase, &voffset, NULL,
 534  670              len, execsz, &brksize)) != 0)
 535  671                  goto bad;
 536  672  
      673 +        if (uphdr != NULL) {
      674 +                /*
      675 +                 * Our uphdr has been dynamically allocated if (and only if)
      676 +                 * its program header flags are clear.
      677 +                 */
      678 +                dynuphdr = (uphdr->p_flags == 0);
      679 +        }
      680 +
 537  681          if (uphdr != NULL && dyphdr == NULL)
 538  682                  goto bad;
 539  683  
 540  684          if (dtrphdr != NULL && dtrace_safe_phdr(dtrphdr, args, voffset) != 0) {
 541  685                  uprintf("%s: Bad DTrace phdr in %s\n", exec_file, exec_file);
 542  686                  goto bad;
 543  687          }
 544  688  
 545  689          if (dyphdr != NULL) {
 546  690                  size_t          len;
 547  691                  uintptr_t       lddata;
 548  692                  char            *p;
 549  693                  struct vnode    *nvp;
 550  694  
 551      -                dlnsize = dyphdr->p_filesz;
      695 +                dlnsize = dyphdr->p_filesz + nsize;
 552  696  
 553  697                  if (dlnsize > MAXPATHLEN || dlnsize <= 0)
 554  698                          goto bad;
 555  699  
      700 +                if (nsize != 0) {
      701 +                        bcopy(args->brand_nroot, dlnp, nsize - 1);
      702 +                        dlnp[nsize - 1] = '/';
      703 +                }
      704 +
 556  705                  /*
 557  706                   * Read in "interpreter" pathname.
 558  707                   */
 559      -                if ((error = vn_rdwr(UIO_READ, vp, dlnp, dyphdr->p_filesz,
 560      -                    (offset_t)dyphdr->p_offset, UIO_SYSSPACE, 0, (rlim64_t)0,
 561      -                    CRED(), &resid)) != 0) {
      708 +                if ((error = vn_rdwr(UIO_READ, vp, dlnp + nsize,
      709 +                    dyphdr->p_filesz, (offset_t)dyphdr->p_offset, UIO_SYSSPACE,
      710 +                    0, (rlim64_t)0, CRED(), &resid)) != 0) {
 562  711                          uprintf("%s: Cannot obtain interpreter pathname\n",
 563  712                              exec_file);
 564  713                          goto bad;
 565  714                  }
 566  715  
 567  716                  if (resid != 0 || dlnp[dlnsize - 1] != '\0')
 568  717                          goto bad;
 569  718  
 570  719                  /*
 571  720                   * Search for '$ORIGIN' token in interpreter path.
 572  721                   * If found, expand it.
 573  722                   */
 574  723                  for (p = dlnp; p = strchr(p, '$'); ) {
 575  724                          uint_t  len, curlen;
 576  725                          char    *_ptr;
 577  726  
 578  727                          if (strncmp(++p, ORIGIN_STR, ORIGIN_STR_SIZE))
 579  728                                  continue;
 580  729  
 581  730                          /*
 582  731                           * We don't support $ORIGIN on setid programs to close
 583  732                           * a potential attack vector.
 584  733                           */
 585  734                          if ((setid & EXECSETID_SETID) != 0) {
 586  735                                  error = ENOEXEC;
 587  736                                  goto bad;
 588  737                          }
 589  738  
 590  739                          curlen = 0;
 591  740                          len = p - dlnp - 1;
 592  741                          if (len) {
 593  742                                  bcopy(dlnp, pathbufp, len);
 594  743                                  curlen += len;
 595  744                          }
 596  745                          if (_ptr = strrchr(args->pathname, '/')) {
 597  746                                  len = _ptr - args->pathname;
 598  747                                  if ((curlen + len) > MAXPATHLEN)
 599  748                                          break;
 600  749  
 601  750                                  bcopy(args->pathname, &pathbufp[curlen], len);
 602  751                                  curlen += len;
 603  752                          } else {
 604  753                                  /*
 605  754                                   * executable is a basename found in the
 606  755                                   * current directory.  So - just substitue
 607  756                                   * '.' for ORIGIN.
 608  757                                   */
 609  758                                  pathbufp[curlen] = '.';
 610  759                                  curlen++;
 611  760                          }
 612  761                          p += ORIGIN_STR_SIZE;
 613  762                          len = strlen(p);
 614  763  
 615  764                          if ((curlen + len) > MAXPATHLEN)
 616  765                                  break;
 617  766                          bcopy(p, &pathbufp[curlen], len);
 618  767                          curlen += len;
 619  768                          pathbufp[curlen++] = '\0';
 620  769                          bcopy(pathbufp, dlnp, curlen);
 621  770                  }
 622  771  
 623  772                  /*
 624  773                   * /usr/lib/ld.so.1 is known to be a symlink to /lib/ld.so.1
 625  774                   * (and /usr/lib/64/ld.so.1 is a symlink to /lib/64/ld.so.1).
 626  775                   * Just in case /usr is not mounted, change it now.
 627  776                   */
 628  777                  if (strcmp(dlnp, USR_LIB_RTLD) == 0)
 629  778                          dlnp += 4;
 630  779                  error = lookupname(dlnp, UIO_SYSSPACE, FOLLOW, NULLVPP, &nvp);
 631  780                  if (error && dlnp != bigwad->dl_name) {
 632  781                          /* new kernel, old user-level */
 633  782                          error = lookupname(dlnp -= 4, UIO_SYSSPACE, FOLLOW,
 634  783                              NULLVPP, &nvp);
 635  784                  }
 636  785                  if (error) {
 637  786                          uprintf("%s: Cannot find %s\n", exec_file, dlnp);
 638  787                          goto bad;
 639  788                  }
 640  789  
 641  790                  /*
 642  791                   * Setup the "aux" vector.
 643  792                   */
 644  793                  if (uphdr) {
 645  794                          if (ehdrp->e_type == ET_DYN) {
 646  795                                  /* don't use the first page */
 647  796                                  bigwad->exenv.ex_brkbase = (caddr_t)PAGESIZE;
 648  797                                  bigwad->exenv.ex_bssbase = (caddr_t)PAGESIZE;
 649  798                          } else {
 650  799                                  bigwad->exenv.ex_bssbase = bssbase;
 651  800                                  bigwad->exenv.ex_brkbase = brkbase;
 652  801                          }
 653  802                          bigwad->exenv.ex_brksize = brksize;
 654  803                          bigwad->exenv.ex_magic = elfmagic;
 655  804                          bigwad->exenv.ex_vp = vp;
 656  805                          setexecenv(&bigwad->exenv);
 657  806  
 658  807                          ADDAUX(aux, AT_PHDR, uphdr->p_vaddr + voffset)
 659  808                          ADDAUX(aux, AT_PHENT, ehdrp->e_phentsize)
 660  809                          ADDAUX(aux, AT_PHNUM, nphdrs)
 661  810                          ADDAUX(aux, AT_ENTRY, ehdrp->e_entry + voffset)
 662  811                  } else {
 663  812                          if ((error = execopen(&vp, &fd)) != 0) {
 664  813                                  VN_RELE(nvp);
 665  814                                  goto bad;
 666  815                          }
 667  816  
 668  817                          ADDAUX(aux, AT_EXECFD, fd)
 669  818                  }
 670  819  
 671  820                  if ((error = execpermissions(nvp, &bigwad->vattr, args)) != 0) {
 672  821                          VN_RELE(nvp);
 673  822                          uprintf("%s: Cannot execute %s\n", exec_file, dlnp);
 674  823                          goto bad;
 675  824                  }
 676  825  
 677  826                  /*
 678  827                   * Now obtain the ELF header along with the entire program
 679  828                   * header contained in "nvp".
 680  829                   */
 681  830                  kmem_free(phdrbase, phdrsize);
 682  831                  phdrbase = NULL;
 683  832                  if ((error = getelfhead(nvp, CRED(), ehdrp, &nshdrs,
 684  833                      &shstrndx, &nphdrs)) != 0 ||
 685  834                      (error = getelfphdr(nvp, CRED(), ehdrp, nphdrs, &phdrbase,
 686  835                      &phdrsize)) != 0) {
 687  836                          VN_RELE(nvp);
 688  837                          uprintf("%s: Cannot read %s\n", exec_file, dlnp);
 689  838                          goto bad;
 690  839                  }
 691  840  
 692  841                  /*
 693  842                   * Determine memory size of the "interpreter's" loadable
 694  843                   * sections.  This size is then used to obtain the virtual
 695  844                   * address of a hole, in the user's address space, large
  
    | 
      ↓ open down ↓ | 
    124 lines elided | 
    
      ↑ open up ↑ | 
  
 696  845                   * enough to map the "interpreter".
 697  846                   */
 698  847                  if ((len = elfsize(ehdrp, nphdrs, phdrbase, &lddata)) == 0) {
 699  848                          VN_RELE(nvp);
 700  849                          uprintf("%s: Nothing to load in %s\n", exec_file, dlnp);
 701  850                          goto bad;
 702  851                  }
 703  852  
 704  853                  dtrphdr = NULL;
 705  854  
 706      -                error = mapelfexec(nvp, ehdrp, nphdrs, phdrbase, &junk, &junk,
      855 +                error = mapelfexec(nvp, ehdrp, nphdrs, phdrbase, NULL, &junk,
 707  856                      &junk, &dtrphdr, NULL, NULL, NULL, &voffset, NULL, len,
 708  857                      execsz, NULL);
      858 +
 709  859                  if (error || junk != NULL) {
 710  860                          VN_RELE(nvp);
 711  861                          uprintf("%s: Cannot map %s\n", exec_file, dlnp);
 712  862                          goto bad;
 713  863                  }
 714  864  
 715  865                  /*
 716  866                   * We use the DTrace program header to initialize the
 717  867                   * architecture-specific user per-LWP location. The dtrace
 718  868                   * fasttrap provider requires ready access to per-LWP scratch
 719  869                   * space. We assume that there is only one such program header
 720  870                   * in the interpreter.
 721  871                   */
 722  872                  if (dtrphdr != NULL &&
 723  873                      dtrace_safe_phdr(dtrphdr, args, voffset) != 0) {
 724  874                          VN_RELE(nvp);
  
    | 
      ↓ open down ↓ | 
    6 lines elided | 
    
      ↑ open up ↑ | 
  
 725  875                          uprintf("%s: Bad DTrace phdr in %s\n", exec_file, dlnp);
 726  876                          goto bad;
 727  877                  }
 728  878  
 729  879                  VN_RELE(nvp);
 730  880                  ADDAUX(aux, AT_SUN_LDDATA, voffset + lddata)
 731  881          }
 732  882  
 733  883          if (hasauxv) {
 734  884                  int auxf = AF_SUN_HWCAPVERIFY;
      885 +
 735  886                  /*
 736      -                 * Note: AT_SUN_PLATFORM and AT_SUN_EXECNAME were filled in via
      887 +                 * Note: AT_SUN_PLATFORM and AT_RANDOM were filled in via
 737  888                   * exec_args()
 738  889                   */
 739  890                  ADDAUX(aux, AT_BASE, voffset)
 740  891                  ADDAUX(aux, AT_FLAGS, at_flags)
 741  892                  ADDAUX(aux, AT_PAGESZ, PAGESIZE)
 742  893                  /*
 743  894                   * Linker flags. (security)
 744  895                   * p_flag not yet set at this time.
 745  896                   * We rely on gexec() to provide us with the information.
 746  897                   * If the application is set-uid but this is not reflected
 747  898                   * in a mismatch between real/effective uids/gids, then
 748  899                   * don't treat this as a set-uid exec.  So we care about
 749  900                   * the EXECSETID_UGIDS flag but not the ...SETID flag.
 750  901                   */
 751  902                  if ((setid &= ~EXECSETID_SETID) != 0)
 752  903                          auxf |= AF_SUN_SETUGID;
 753  904  
 754  905                  /*
  
    | 
      ↓ open down ↓ | 
    8 lines elided | 
    
      ↑ open up ↑ | 
  
 755  906                   * If we're running a native process from within a branded
 756  907                   * zone under pfexec then we clear the AF_SUN_SETUGID flag so
 757  908                   * that the native ld.so.1 is able to link with the native
 758  909                   * libraries instead of using the brand libraries that are
 759  910                   * installed in the zone.  We only do this for processes
 760  911                   * which we trust because we see they are already running
 761  912                   * under pfexec (where uid != euid).  This prevents a
 762  913                   * malicious user within the zone from crafting a wrapper to
 763  914                   * run native suid commands with unsecure libraries interposed.
 764  915                   */
 765      -                if ((brand_action == EBA_NATIVE) && (PROC_IS_BRANDED(p) &&
      916 +                if ((*brand_action == EBA_NATIVE) && (PROC_IS_BRANDED(p) &&
 766  917                      (setid &= ~EXECSETID_SETID) != 0))
 767  918                          auxf &= ~AF_SUN_SETUGID;
 768  919  
 769  920                  /*
 770  921                   * Record the user addr of the auxflags aux vector entry
 771  922                   * since brands may optionally want to manipulate this field.
 772  923                   */
 773  924                  args->auxp_auxflags =
 774  925                      (char *)((char *)args->stackend +
 775  926                      ((char *)&aux->a_type -
 776  927                      (char *)bigwad->elfargs));
 777  928                  ADDAUX(aux, AT_SUN_AUXFLAGS, auxf);
      929 +
 778  930                  /*
      931 +                 * Record information about the real and effective user and
      932 +                 * group IDs.
      933 +                 */
      934 +                if (cred != NULL) {
      935 +                        ADDAUX(aux, AT_SUN_UID, crgetuid(cred));
      936 +                        ADDAUX(aux, AT_SUN_RUID, crgetruid(cred));
      937 +                        ADDAUX(aux, AT_SUN_GID, crgetgid(cred));
      938 +                        ADDAUX(aux, AT_SUN_RGID, crgetrgid(cred));
      939 +                }
      940 +
      941 +                /*
 779  942                   * Hardware capability flag word (performance hints)
 780  943                   * Used for choosing faster library routines.
 781  944                   * (Potentially different between 32-bit and 64-bit ABIs)
 782  945                   */
 783  946  #if defined(_LP64)
 784  947                  if (args->to_model == DATAMODEL_NATIVE) {
 785  948                          ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
 786  949                          ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2)
 787  950                  } else {
 788  951                          ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap32)
 789  952                          ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap32_2)
 790  953                  }
 791  954  #else
 792  955                  ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
 793  956                  ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2)
 794  957  #endif
 795  958                  if (branded) {
 796  959                          /*
  
    | 
      ↓ open down ↓ | 
    8 lines elided | 
    
      ↑ open up ↑ | 
  
 797  960                           * Reserve space for the brand-private aux vectors,
 798  961                           * and record the user addr of that space.
 799  962                           */
 800  963                          args->auxp_brand =
 801  964                              (char *)((char *)args->stackend +
 802  965                              ((char *)&aux->a_type -
 803  966                              (char *)bigwad->elfargs));
 804  967                          ADDAUX(aux, AT_SUN_BRAND_AUX1, 0)
 805  968                          ADDAUX(aux, AT_SUN_BRAND_AUX2, 0)
 806  969                          ADDAUX(aux, AT_SUN_BRAND_AUX3, 0)
      970 +                        ADDAUX(aux, AT_SUN_BRAND_AUX4, 0)
 807  971                  }
 808  972  
      973 +                /*
      974 +                 * Add the comm page auxv entry, mapping it in if needed.
      975 +                 */
      976 +#if defined(__amd64)
      977 +                if (args->commpage != NULL ||
      978 +                    (args->commpage = (uintptr_t)comm_page_mapin()) != NULL) {
      979 +                        ADDAUX(aux, AT_SUN_COMMPAGE, args->commpage)
      980 +                } else {
      981 +                        /*
      982 +                         * If the comm page cannot be mapped, pad out the auxv
      983 +                         * to satisfy later size checks.
      984 +                         */
      985 +                        ADDAUX(aux, AT_NULL, 0)
      986 +                }
      987 +#endif /* defined(__amd64) */
      988 +
 809  989                  ADDAUX(aux, AT_NULL, 0)
 810  990                  postfixsize = (char *)aux - (char *)bigwad->elfargs;
 811  991  
 812  992                  /*
 813  993                   * We make assumptions above when we determine how many aux
 814  994                   * vector entries we will be adding. However, if we have an
 815  995                   * invalid elf file, it is possible that mapelfexec might
 816  996                   * behave differently (but not return an error), in which case
 817  997                   * the number of aux entries we actually add will be different.
 818  998                   * We detect that now and error out.
 819  999                   */
 820 1000                  if (postfixsize != args->auxsize) {
 821 1001                          DTRACE_PROBE2(elfexec_badaux, int, postfixsize,
 822 1002                              int, args->auxsize);
 823 1003                          goto bad;
 824 1004                  }
 825 1005                  ASSERT(postfixsize <= __KERN_NAUXV_IMPL * sizeof (aux_entry_t));
 826 1006          }
 827 1007  
 828 1008          /*
 829 1009           * For the 64-bit kernel, the limit is big enough that rounding it up
 830 1010           * to a page can overflow the 64-bit limit, so we check for btopr()
 831 1011           * overflowing here by comparing it with the unrounded limit in pages.
 832 1012           * If it hasn't overflowed, compare the exec size with the rounded up
 833 1013           * limit in pages.  Otherwise, just compare with the unrounded limit.
 834 1014           */
 835 1015          limit = btop(p->p_vmem_ctl);
 836 1016          roundlimit = btopr(p->p_vmem_ctl);
 837 1017          if ((roundlimit > limit && *execsz > roundlimit) ||
  
    | 
      ↓ open down ↓ | 
    19 lines elided | 
    
      ↑ open up ↑ | 
  
 838 1018              (roundlimit < limit && *execsz > limit)) {
 839 1019                  mutex_enter(&p->p_lock);
 840 1020                  (void) rctl_action(rctlproc_legacy[RLIMIT_VMEM], p->p_rctls, p,
 841 1021                      RCA_SAFE);
 842 1022                  mutex_exit(&p->p_lock);
 843 1023                  error = ENOMEM;
 844 1024                  goto bad;
 845 1025          }
 846 1026  
 847 1027          bzero(up->u_auxv, sizeof (up->u_auxv));
     1028 +        up->u_commpagep = args->commpage;
 848 1029          if (postfixsize) {
 849 1030                  int num_auxv;
 850 1031  
 851 1032                  /*
 852 1033                   * Copy the aux vector to the user stack.
 853 1034                   */
 854 1035                  error = execpoststack(args, bigwad->elfargs, postfixsize);
 855 1036                  if (error)
 856 1037                          goto bad;
 857 1038  
 858 1039                  /*
 859 1040                   * Copy auxv to the process's user structure for use by /proc.
 860 1041                   * If this is a branded process, the brand's exec routine will
 861 1042                   * copy it's private entries to the user structure later. It
 862 1043                   * relies on the fact that the blank entries are at the end.
 863 1044                   */
 864 1045                  num_auxv = postfixsize / sizeof (aux_entry_t);
 865 1046                  ASSERT(num_auxv <= sizeof (up->u_auxv) / sizeof (auxv_t));
 866 1047                  aux = bigwad->elfargs;
 867 1048                  for (i = 0; i < num_auxv; i++) {
 868 1049                          up->u_auxv[i].a_type = aux[i].a_type;
 869 1050                          up->u_auxv[i].a_un.a_val = (aux_val_t)aux[i].a_un.a_val;
 870 1051                  }
 871 1052          }
 872 1053  
 873 1054          /*
 874 1055           * Pass back the starting address so we can set the program counter.
 875 1056           */
 876 1057          args->entry = (uintptr_t)(ehdrp->e_entry + voffset);
 877 1058  
 878 1059          if (!uphdr) {
 879 1060                  if (ehdrp->e_type == ET_DYN) {
 880 1061                          /*
 881 1062                           * If we are executing a shared library which doesn't
 882 1063                           * have a interpreter (probably ld.so.1) then
 883 1064                           * we don't set the brkbase now.  Instead we
 884 1065                           * delay it's setting until the first call
 885 1066                           * via grow.c::brk().  This permits ld.so.1 to
 886 1067                           * initialize brkbase to the tail of the executable it
 887 1068                           * loads (which is where it needs to be).
 888 1069                           */
 889 1070                          bigwad->exenv.ex_brkbase = (caddr_t)0;
 890 1071                          bigwad->exenv.ex_bssbase = (caddr_t)0;
 891 1072                          bigwad->exenv.ex_brksize = 0;
 892 1073                  } else {
 893 1074                          bigwad->exenv.ex_brkbase = brkbase;
 894 1075                          bigwad->exenv.ex_bssbase = bssbase;
 895 1076                          bigwad->exenv.ex_brksize = brksize;
 896 1077                  }
 897 1078                  bigwad->exenv.ex_magic = elfmagic;
 898 1079                  bigwad->exenv.ex_vp = vp;
 899 1080                  setexecenv(&bigwad->exenv);
 900 1081          }
 901 1082  
 902 1083          ASSERT(error == 0);
 903 1084          goto out;
  
    | 
      ↓ open down ↓ | 
    46 lines elided | 
    
      ↑ open up ↑ | 
  
 904 1085  
 905 1086  bad:
 906 1087          if (fd != -1)           /* did we open the a.out yet */
 907 1088                  (void) execclose(fd);
 908 1089  
 909 1090          psignal(p, SIGKILL);
 910 1091  
 911 1092          if (error == 0)
 912 1093                  error = ENOEXEC;
 913 1094  out:
     1095 +        if (dynuphdr)
     1096 +                kmem_free(uphdr, sizeof (Phdr));
 914 1097          if (phdrbase != NULL)
 915 1098                  kmem_free(phdrbase, phdrsize);
 916 1099          if (cap != NULL)
 917 1100                  kmem_free(cap, capsize);
 918 1101          kmem_free(bigwad, sizeof (struct bigwad));
 919 1102          return (error);
 920 1103  }
 921 1104  
 922 1105  /*
 923 1106   * Compute the memory size requirement for the ELF file.
 924 1107   */
 925 1108  static size_t
 926 1109  elfsize(Ehdr *ehdrp, int nphdrs, caddr_t phdrbase, uintptr_t *lddata)
 927 1110  {
 928 1111          size_t  len;
 929 1112          Phdr    *phdrp = (Phdr *)phdrbase;
 930 1113          int     hsize = ehdrp->e_phentsize;
 931 1114          int     first = 1;
 932 1115          int     dfirst = 1;     /* first data segment */
 933 1116          uintptr_t loaddr = 0;
 934 1117          uintptr_t hiaddr = 0;
 935 1118          uintptr_t lo, hi;
 936 1119          int     i;
 937 1120  
 938 1121          for (i = nphdrs; i > 0; i--) {
 939 1122                  if (phdrp->p_type == PT_LOAD) {
 940 1123                          lo = phdrp->p_vaddr;
 941 1124                          hi = lo + phdrp->p_memsz;
 942 1125                          if (first) {
 943 1126                                  loaddr = lo;
 944 1127                                  hiaddr = hi;
 945 1128                                  first = 0;
 946 1129                          } else {
 947 1130                                  if (loaddr > lo)
 948 1131                                          loaddr = lo;
 949 1132                                  if (hiaddr < hi)
 950 1133                                          hiaddr = hi;
 951 1134                          }
 952 1135  
 953 1136                          /*
 954 1137                           * save the address of the first data segment
 955 1138                           * of a object - used for the AT_SUNW_LDDATA
 956 1139                           * aux entry.
 957 1140                           */
 958 1141                          if ((lddata != NULL) && dfirst &&
 959 1142                              (phdrp->p_flags & PF_W)) {
 960 1143                                  *lddata = lo;
 961 1144                                  dfirst = 0;
 962 1145                          }
 963 1146                  }
 964 1147                  phdrp = (Phdr *)((caddr_t)phdrp + hsize);
 965 1148          }
 966 1149  
 967 1150          len = hiaddr - (loaddr & PAGEMASK);
 968 1151          len = roundup(len, PAGESIZE);
 969 1152  
 970 1153          return (len);
 971 1154  }
 972 1155  
 973 1156  /*
 974 1157   * Read in the ELF header and program header table.
 975 1158   * SUSV3 requires:
 976 1159   *      ENOEXEC File format is not recognized
 977 1160   *      EINVAL  Format recognized but execution not supported
 978 1161   */
 979 1162  static int
 980 1163  getelfhead(vnode_t *vp, cred_t *credp, Ehdr *ehdr, int *nshdrs, int *shstrndx,
 981 1164      int *nphdrs)
 982 1165  {
 983 1166          int error;
 984 1167          ssize_t resid;
 985 1168  
 986 1169          /*
 987 1170           * We got here by the first two bytes in ident,
 988 1171           * now read the entire ELF header.
 989 1172           */
 990 1173          if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)ehdr,
 991 1174              sizeof (Ehdr), (offset_t)0, UIO_SYSSPACE, 0,
 992 1175              (rlim64_t)0, credp, &resid)) != 0)
 993 1176                  return (error);
 994 1177  
 995 1178          /*
 996 1179           * Since a separate version is compiled for handling 32-bit and
 997 1180           * 64-bit ELF executables on a 64-bit kernel, the 64-bit version
 998 1181           * doesn't need to be able to deal with 32-bit ELF files.
 999 1182           */
1000 1183          if (resid != 0 ||
1001 1184              ehdr->e_ident[EI_MAG2] != ELFMAG2 ||
1002 1185              ehdr->e_ident[EI_MAG3] != ELFMAG3)
1003 1186                  return (ENOEXEC);
1004 1187  
1005 1188          if ((ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) ||
1006 1189  #if defined(_ILP32) || defined(_ELF32_COMPAT)
1007 1190              ehdr->e_ident[EI_CLASS] != ELFCLASS32 ||
1008 1191  #else
1009 1192              ehdr->e_ident[EI_CLASS] != ELFCLASS64 ||
1010 1193  #endif
1011 1194              !elfheadcheck(ehdr->e_ident[EI_DATA], ehdr->e_machine,
1012 1195              ehdr->e_flags))
1013 1196                  return (EINVAL);
1014 1197  
1015 1198          *nshdrs = ehdr->e_shnum;
1016 1199          *shstrndx = ehdr->e_shstrndx;
1017 1200          *nphdrs = ehdr->e_phnum;
1018 1201  
1019 1202          /*
1020 1203           * If e_shnum, e_shstrndx, or e_phnum is its sentinel value, we need
1021 1204           * to read in the section header at index zero to acces the true
1022 1205           * values for those fields.
1023 1206           */
1024 1207          if ((*nshdrs == 0 && ehdr->e_shoff != 0) ||
1025 1208              *shstrndx == SHN_XINDEX || *nphdrs == PN_XNUM) {
1026 1209                  Shdr shdr;
1027 1210  
1028 1211                  if (ehdr->e_shoff == 0)
1029 1212                          return (EINVAL);
1030 1213  
1031 1214                  if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)&shdr,
1032 1215                      sizeof (shdr), (offset_t)ehdr->e_shoff, UIO_SYSSPACE, 0,
1033 1216                      (rlim64_t)0, credp, &resid)) != 0)
1034 1217                          return (error);
1035 1218  
1036 1219                  if (*nshdrs == 0)
1037 1220                          *nshdrs = shdr.sh_size;
1038 1221                  if (*shstrndx == SHN_XINDEX)
1039 1222                          *shstrndx = shdr.sh_link;
1040 1223                  if (*nphdrs == PN_XNUM && shdr.sh_info != 0)
1041 1224                          *nphdrs = shdr.sh_info;
1042 1225          }
1043 1226  
1044 1227          return (0);
1045 1228  }
1046 1229  
1047 1230  #ifdef _ELF32_COMPAT
1048 1231  extern size_t elf_nphdr_max;
1049 1232  #else
1050 1233  size_t elf_nphdr_max = 1000;
1051 1234  #endif
1052 1235  
1053 1236  static int
1054 1237  getelfphdr(vnode_t *vp, cred_t *credp, const Ehdr *ehdr, int nphdrs,
1055 1238      caddr_t *phbasep, ssize_t *phsizep)
1056 1239  {
1057 1240          ssize_t resid, minsize;
1058 1241          int err;
1059 1242  
1060 1243          /*
1061 1244           * Since we're going to be using e_phentsize to iterate down the
1062 1245           * array of program headers, it must be 8-byte aligned or else
1063 1246           * a we might cause a misaligned access. We use all members through
1064 1247           * p_flags on 32-bit ELF files and p_memsz on 64-bit ELF files so
1065 1248           * e_phentsize must be at least large enough to include those
1066 1249           * members.
1067 1250           */
1068 1251  #if !defined(_LP64) || defined(_ELF32_COMPAT)
1069 1252          minsize = offsetof(Phdr, p_flags) + sizeof (((Phdr *)NULL)->p_flags);
1070 1253  #else
1071 1254          minsize = offsetof(Phdr, p_memsz) + sizeof (((Phdr *)NULL)->p_memsz);
1072 1255  #endif
1073 1256          if (ehdr->e_phentsize < minsize || (ehdr->e_phentsize & 3))
1074 1257                  return (EINVAL);
1075 1258  
1076 1259          *phsizep = nphdrs * ehdr->e_phentsize;
1077 1260  
1078 1261          if (*phsizep > sizeof (Phdr) * elf_nphdr_max) {
1079 1262                  if ((*phbasep = kmem_alloc(*phsizep, KM_NOSLEEP)) == NULL)
1080 1263                          return (ENOMEM);
1081 1264          } else {
1082 1265                  *phbasep = kmem_alloc(*phsizep, KM_SLEEP);
1083 1266          }
1084 1267  
1085 1268          if ((err = vn_rdwr(UIO_READ, vp, *phbasep, *phsizep,
1086 1269              (offset_t)ehdr->e_phoff, UIO_SYSSPACE, 0, (rlim64_t)0,
1087 1270              credp, &resid)) != 0) {
1088 1271                  kmem_free(*phbasep, *phsizep);
1089 1272                  *phbasep = NULL;
1090 1273                  return (err);
1091 1274          }
1092 1275  
1093 1276          return (0);
1094 1277  }
1095 1278  
1096 1279  #ifdef _ELF32_COMPAT
1097 1280  extern size_t elf_nshdr_max;
1098 1281  extern size_t elf_shstrtab_max;
1099 1282  #else
1100 1283  size_t elf_nshdr_max = 10000;
1101 1284  size_t elf_shstrtab_max = 100 * 1024;
1102 1285  #endif
1103 1286  
1104 1287  
1105 1288  static int
1106 1289  getelfshdr(vnode_t *vp, cred_t *credp, const Ehdr *ehdr,
1107 1290      int nshdrs, int shstrndx, caddr_t *shbasep, ssize_t *shsizep,
1108 1291      char **shstrbasep, ssize_t *shstrsizep)
1109 1292  {
1110 1293          ssize_t resid, minsize;
1111 1294          int err;
1112 1295          Shdr *shdr;
1113 1296  
1114 1297          /*
1115 1298           * Since we're going to be using e_shentsize to iterate down the
1116 1299           * array of section headers, it must be 8-byte aligned or else
1117 1300           * a we might cause a misaligned access. We use all members through
1118 1301           * sh_entsize (on both 32- and 64-bit ELF files) so e_shentsize
1119 1302           * must be at least large enough to include that member. The index
1120 1303           * of the string table section must also be valid.
1121 1304           */
1122 1305          minsize = offsetof(Shdr, sh_entsize) + sizeof (shdr->sh_entsize);
1123 1306          if (ehdr->e_shentsize < minsize || (ehdr->e_shentsize & 3) ||
1124 1307              shstrndx >= nshdrs)
1125 1308                  return (EINVAL);
1126 1309  
1127 1310          *shsizep = nshdrs * ehdr->e_shentsize;
1128 1311  
1129 1312          if (*shsizep > sizeof (Shdr) * elf_nshdr_max) {
1130 1313                  if ((*shbasep = kmem_alloc(*shsizep, KM_NOSLEEP)) == NULL)
1131 1314                          return (ENOMEM);
1132 1315          } else {
1133 1316                  *shbasep = kmem_alloc(*shsizep, KM_SLEEP);
1134 1317          }
1135 1318  
1136 1319          if ((err = vn_rdwr(UIO_READ, vp, *shbasep, *shsizep,
1137 1320              (offset_t)ehdr->e_shoff, UIO_SYSSPACE, 0, (rlim64_t)0,
1138 1321              credp, &resid)) != 0) {
1139 1322                  kmem_free(*shbasep, *shsizep);
1140 1323                  return (err);
1141 1324          }
1142 1325  
1143 1326          /*
1144 1327           * Pull the section string table out of the vnode; fail if the size
1145 1328           * is zero.
1146 1329           */
1147 1330          shdr = (Shdr *)(*shbasep + shstrndx * ehdr->e_shentsize);
1148 1331          if ((*shstrsizep = shdr->sh_size) == 0) {
1149 1332                  kmem_free(*shbasep, *shsizep);
1150 1333                  return (EINVAL);
1151 1334          }
1152 1335  
1153 1336          if (*shstrsizep > elf_shstrtab_max) {
1154 1337                  if ((*shstrbasep = kmem_alloc(*shstrsizep,
1155 1338                      KM_NOSLEEP)) == NULL) {
1156 1339                          kmem_free(*shbasep, *shsizep);
1157 1340                          return (ENOMEM);
1158 1341                  }
1159 1342          } else {
1160 1343                  *shstrbasep = kmem_alloc(*shstrsizep, KM_SLEEP);
1161 1344          }
1162 1345  
1163 1346          if ((err = vn_rdwr(UIO_READ, vp, *shstrbasep, *shstrsizep,
1164 1347              (offset_t)shdr->sh_offset, UIO_SYSSPACE, 0, (rlim64_t)0,
1165 1348              credp, &resid)) != 0) {
1166 1349                  kmem_free(*shbasep, *shsizep);
1167 1350                  kmem_free(*shstrbasep, *shstrsizep);
1168 1351                  return (err);
1169 1352          }
  
    | 
      ↓ open down ↓ | 
    246 lines elided | 
    
      ↑ open up ↑ | 
  
1170 1353  
1171 1354          /*
1172 1355           * Make sure the strtab is null-terminated to make sure we
1173 1356           * don't run off the end of the table.
1174 1357           */
1175 1358          (*shstrbasep)[*shstrsizep - 1] = '\0';
1176 1359  
1177 1360          return (0);
1178 1361  }
1179 1362  
     1363 +
     1364 +#ifdef _ELF32_COMPAT
     1365 +int
     1366 +elf32readhdr(vnode_t *vp, cred_t *credp, Ehdr *ehdrp, int *nphdrs,
     1367 +    caddr_t *phbasep, ssize_t *phsizep)
     1368 +#else
     1369 +int
     1370 +elfreadhdr(vnode_t *vp, cred_t *credp, Ehdr *ehdrp, int *nphdrs,
     1371 +    caddr_t *phbasep, ssize_t *phsizep)
     1372 +#endif
     1373 +{
     1374 +        int error, nshdrs, shstrndx;
     1375 +
     1376 +        if ((error = getelfhead(vp, credp, ehdrp, &nshdrs, &shstrndx,
     1377 +            nphdrs)) != 0 ||
     1378 +            (error = getelfphdr(vp, credp, ehdrp, *nphdrs, phbasep,
     1379 +            phsizep)) != 0) {
     1380 +                return (error);
     1381 +        }
     1382 +        return (0);
     1383 +}
     1384 +
     1385 +
1180 1386  static int
1181 1387  mapelfexec(
1182 1388          vnode_t *vp,
1183 1389          Ehdr *ehdr,
1184 1390          int nphdrs,
1185 1391          caddr_t phdrbase,
1186 1392          Phdr **uphdr,
1187 1393          Phdr **dyphdr,
1188 1394          Phdr **stphdr,
1189 1395          Phdr **dtphdr,
1190 1396          Phdr *dataphdrp,
1191 1397          caddr_t *bssbase,
1192 1398          caddr_t *brkbase,
1193 1399          intptr_t *voffset,
1194 1400          intptr_t *minaddr,
1195 1401          size_t len,
1196 1402          long *execsz,
1197 1403          size_t *brksize)
1198 1404  {
1199 1405          Phdr *phdr;
1200      -        int i, prot, error;
     1406 +        int i, prot, error, lastprot = 0;
1201 1407          caddr_t addr = NULL;
1202 1408          size_t zfodsz;
1203 1409          int ptload = 0;
1204 1410          int page;
1205 1411          off_t offset;
1206 1412          int hsize = ehdr->e_phentsize;
1207 1413          caddr_t mintmp = (caddr_t)-1;
     1414 +        uintptr_t lastaddr = NULL;
1208 1415          extern int use_brk_lpg;
1209 1416  
1210 1417          if (ehdr->e_type == ET_DYN) {
1211      -                /*
1212      -                 * Obtain the virtual address of a hole in the
1213      -                 * address space to map the "interpreter".
1214      -                 */
1215      -                map_addr(&addr, len, (offset_t)0, 1, 0);
1216      -                if (addr == NULL)
1217      -                        return (ENOMEM);
1218      -                *voffset = (intptr_t)addr;
     1418 +                caddr_t vaddr;
1219 1419  
1220 1420                  /*
1221      -                 * Calculate the minimum vaddr so it can be subtracted out.
1222      -                 * According to the ELF specification, since PT_LOAD sections
1223      -                 * must be sorted by increasing p_vaddr values, this is
1224      -                 * guaranteed to be the first PT_LOAD section.
     1421 +                 * Despite the fact that mmapobj(2) refuses to load them, we
     1422 +                 * need to support executing ET_DYN objects that have a
     1423 +                 * non-NULL p_vaddr.  When found in the wild, these objects
     1424 +                 * are likely to be due to an old (and largely obviated) Linux
     1425 +                 * facility, prelink(8), that rewrites shared objects to
     1426 +                 * prefer specific (disjoint) virtual address ranges.  (Yes,
     1427 +                 * this is putatively for performance -- and yes, it has
     1428 +                 * limited applicability, many edge conditions and grisly
     1429 +                 * failure modes; even for Linux, it's insane.)  As ELF
     1430 +                 * mandates that the PT_LOAD segments be in p_vaddr order, we
     1431 +                 * find the lowest p_vaddr by finding the first PT_LOAD
     1432 +                 * segment.
1225 1433                   */
1226 1434                  phdr = (Phdr *)phdrbase;
1227 1435                  for (i = nphdrs; i > 0; i--) {
1228 1436                          if (phdr->p_type == PT_LOAD) {
1229      -                                *voffset -= (uintptr_t)phdr->p_vaddr;
     1437 +                                addr = (caddr_t)(uintptr_t)phdr->p_vaddr;
1230 1438                                  break;
1231 1439                          }
1232 1440                          phdr = (Phdr *)((caddr_t)phdr + hsize);
1233 1441                  }
1234 1442  
     1443 +                /*
     1444 +                 * We have a non-zero p_vaddr in the first PT_LOAD segment --
     1445 +                 * presumably because we're directly executing a prelink(8)'d
     1446 +                 * ld-linux.so.  While we could correctly execute such an
     1447 +                 * object without locating it at its desired p_vaddr (it is,
     1448 +                 * after all, still relocatable), our inner antiquarian
     1449 +                 * derives a perverse pleasure in accommodating the steampunk
     1450 +                 * prelink(8) contraption -- goggles on!
     1451 +                 */
     1452 +                if ((vaddr = addr) != NULL) {
     1453 +                        if (as_gap(curproc->p_as, len,
     1454 +                            &addr, &len, AH_LO, NULL) == -1 || addr != vaddr) {
     1455 +                                addr = NULL;
     1456 +                        }
     1457 +                }
     1458 +
     1459 +                if (addr == NULL) {
     1460 +                        /*
     1461 +                         * We either have a NULL p_vaddr (the common case, by
     1462 +                         * many orders of magnitude) or we have a non-NULL
     1463 +                         * p_vaddr and we were unable to obtain the specified
     1464 +                         * VA range (presumably because it's an illegal
     1465 +                         * address).  Either way, obtain an address in which
     1466 +                         * to map the interpreter.
     1467 +                         */
     1468 +                        map_addr(&addr, len, (offset_t)0, 1, 0);
     1469 +                        if (addr == NULL)
     1470 +                                return (ENOMEM);
     1471 +                }
     1472 +
     1473 +                /*
     1474 +                 * Our voffset is the difference between where we landed and
     1475 +                 * where we wanted to be.
     1476 +                 */
     1477 +                *voffset = (uintptr_t)addr - (uintptr_t)vaddr;
1235 1478          } else {
1236 1479                  *voffset = 0;
1237 1480          }
     1481 +
1238 1482          phdr = (Phdr *)phdrbase;
1239 1483          for (i = nphdrs; i > 0; i--) {
1240 1484                  switch (phdr->p_type) {
1241 1485                  case PT_LOAD:
1242      -                        if ((*dyphdr != NULL) && (*uphdr == NULL))
1243      -                                return (0);
1244      -
1245 1486                          ptload = 1;
1246 1487                          prot = PROT_USER;
1247 1488                          if (phdr->p_flags & PF_R)
1248 1489                                  prot |= PROT_READ;
1249 1490                          if (phdr->p_flags & PF_W)
1250 1491                                  prot |= PROT_WRITE;
1251 1492                          if (phdr->p_flags & PF_X)
1252 1493                                  prot |= PROT_EXEC;
1253 1494  
1254 1495                          addr = (caddr_t)((uintptr_t)phdr->p_vaddr + *voffset);
1255 1496  
     1497 +                        if ((*dyphdr != NULL) && uphdr != NULL &&
     1498 +                            (*uphdr == NULL)) {
     1499 +                                /*
     1500 +                                 * The PT_PHDR program header is, strictly
     1501 +                                 * speaking, optional.  If we find that this
     1502 +                                 * is missing, we will determine the location
     1503 +                                 * of the program headers based on the address
     1504 +                                 * of the lowest PT_LOAD segment (namely, this
     1505 +                                 * one):  we subtract the p_offset to get to
     1506 +                                 * the ELF header and then add back the program
     1507 +                                 * header offset to get to the program headers.
     1508 +                                 * We then cons up a Phdr that corresponds to
     1509 +                                 * the (missing) PT_PHDR, setting the flags
     1510 +                                 * to 0 to denote that this is artificial and
     1511 +                                 * should (must) be freed by the caller.
     1512 +                                 */
     1513 +                                Phdr *cons;
     1514 +
     1515 +                                cons = kmem_zalloc(sizeof (Phdr), KM_SLEEP);
     1516 +
     1517 +                                cons->p_flags = 0;
     1518 +                                cons->p_type = PT_PHDR;
     1519 +                                cons->p_vaddr = ((uintptr_t)addr -
     1520 +                                    phdr->p_offset) + ehdr->e_phoff;
     1521 +
     1522 +                                *uphdr = cons;
     1523 +                        }
     1524 +
1256 1525                          /*
1257 1526                           * Keep track of the segment with the lowest starting
1258 1527                           * address.
1259 1528                           */
1260 1529                          if (addr < mintmp)
1261 1530                                  mintmp = addr;
1262 1531  
     1532 +                        /*
     1533 +                         * Segments need not correspond to page boundaries:
     1534 +                         * they are permitted to share a page.  If two PT_LOAD
     1535 +                         * segments share the same page, and the permissions
     1536 +                         * of the segments differ, the behavior is historically
     1537 +                         * that the permissions of the latter segment are used
     1538 +                         * for the page that the two segments share.  This is
     1539 +                         * also historically a non-issue:  binaries generated
     1540 +                         * by most anything will make sure that two PT_LOAD
     1541 +                         * segments with differing permissions don't actually
     1542 +                         * share any pages.  However, there exist some crazy
     1543 +                         * things out there (including at least an obscure
     1544 +                         * Portuguese teaching language called G-Portugol) that
     1545 +                         * actually do the wrong thing and expect it to work:
     1546 +                         * they have a segment with execute permission share
     1547 +                         * a page with a subsequent segment that does not
     1548 +                         * have execute permissions and expect the resulting
     1549 +                         * shared page to in fact be executable.  To accommodate
     1550 +                         * such broken link editors, we take advantage of a
     1551 +                         * latitude explicitly granted to the loader:  it is
     1552 +                         * permitted to make _any_ PT_LOAD segment executable
     1553 +                         * (provided that it is readable or writable).  If we
     1554 +                         * see that we're sharing a page and that the previous
     1555 +                         * page was executable, we will add execute permissions
     1556 +                         * to our segment.
     1557 +                         */
     1558 +                        if (btop(lastaddr) == btop((uintptr_t)addr) &&
     1559 +                            (phdr->p_flags & (PF_R | PF_W)) &&
     1560 +                            (lastprot & PROT_EXEC)) {
     1561 +                                prot |= PROT_EXEC;
     1562 +                        }
     1563 +
     1564 +                        lastaddr = (uintptr_t)addr + phdr->p_filesz;
     1565 +                        lastprot = prot;
     1566 +
1263 1567                          zfodsz = (size_t)phdr->p_memsz - phdr->p_filesz;
1264 1568  
1265 1569                          offset = phdr->p_offset;
1266 1570                          if (((uintptr_t)offset & PAGEOFFSET) ==
1267 1571                              ((uintptr_t)addr & PAGEOFFSET) &&
1268 1572                              (!(vp->v_flag & VNOMAP))) {
1269 1573                                  page = 1;
1270 1574                          } else {
1271 1575                                  page = 0;
1272 1576                          }
1273 1577  
1274 1578                          /*
1275 1579                           * Set the heap pagesize for OOB when the bss size
1276 1580                           * is known and use_brk_lpg is not 0.
1277 1581                           */
1278 1582                          if (brksize != NULL && use_brk_lpg &&
1279 1583                              zfodsz != 0 && phdr == dataphdrp &&
1280 1584                              (prot & PROT_WRITE)) {
1281 1585                                  size_t tlen = P2NPHASE((uintptr_t)addr +
1282 1586                                      phdr->p_filesz, PAGESIZE);
1283 1587  
1284 1588                                  if (zfodsz > tlen) {
1285 1589                                          curproc->p_brkpageszc =
1286 1590                                              page_szc(map_pgsz(MAPPGSZ_HEAP,
1287 1591                                              curproc, addr + phdr->p_filesz +
1288 1592                                              tlen, zfodsz - tlen, 0));
1289 1593                                  }
1290 1594                          }
1291 1595  
1292 1596                          if (curproc->p_brkpageszc != 0 && phdr == dataphdrp &&
1293 1597                              (prot & PROT_WRITE)) {
1294 1598                                  uint_t  szc = curproc->p_brkpageszc;
1295 1599                                  size_t pgsz = page_get_pagesize(szc);
1296 1600                                  caddr_t ebss = addr + phdr->p_memsz;
1297 1601                                  size_t extra_zfodsz;
1298 1602  
1299 1603                                  ASSERT(pgsz > PAGESIZE);
1300 1604  
1301 1605                                  extra_zfodsz = P2NPHASE((uintptr_t)ebss, pgsz);
1302 1606  
1303 1607                                  if (error = execmap(vp, addr, phdr->p_filesz,
1304 1608                                      zfodsz + extra_zfodsz, phdr->p_offset,
1305 1609                                      prot, page, szc))
1306 1610                                          goto bad;
1307 1611                                  if (brksize != NULL)
1308 1612                                          *brksize = extra_zfodsz;
1309 1613                          } else {
1310 1614                                  if (error = execmap(vp, addr, phdr->p_filesz,
1311 1615                                      zfodsz, phdr->p_offset, prot, page, 0))
1312 1616                                          goto bad;
1313 1617                          }
1314 1618  
1315 1619                          if (bssbase != NULL && addr >= *bssbase &&
1316 1620                              phdr == dataphdrp) {
  
    | 
      ↓ open down ↓ | 
    44 lines elided | 
    
      ↑ open up ↑ | 
  
1317 1621                                  *bssbase = addr + phdr->p_filesz;
1318 1622                          }
1319 1623                          if (brkbase != NULL && addr >= *brkbase) {
1320 1624                                  *brkbase = addr + phdr->p_memsz;
1321 1625                          }
1322 1626  
1323 1627                          *execsz += btopr(phdr->p_memsz);
1324 1628                          break;
1325 1629  
1326 1630                  case PT_INTERP:
1327      -                        if (ptload)
1328      -                                goto bad;
     1631 +                        /*
     1632 +                         * The ELF specification is unequivocal about the
     1633 +                         * PT_INTERP program header with respect to any PT_LOAD
     1634 +                         * program header:  "If it is present, it must precede
     1635 +                         * any loadable segment entry." Linux, however, makes
     1636 +                         * no attempt to enforce this -- which has allowed some
     1637 +                         * binary editing tools to get away with generating
     1638 +                         * invalid ELF binaries in the respect that PT_INTERP
     1639 +                         * occurs after the first PT_LOAD program header.  This
     1640 +                         * is unfortunate (and of course, disappointing) but
     1641 +                         * it's no worse than that: there is no reason that we
     1642 +                         * can't process the PT_INTERP entry (if present) after
     1643 +                         * one or more PT_LOAD entries.  We therefore
     1644 +                         * deliberately do not check ptload here and always
     1645 +                         * store dyphdr to be the PT_INTERP program header.
     1646 +                         */
1329 1647                          *dyphdr = phdr;
1330 1648                          break;
1331 1649  
1332 1650                  case PT_SHLIB:
1333 1651                          *stphdr = phdr;
1334 1652                          break;
1335 1653  
1336 1654                  case PT_PHDR:
1337      -                        if (ptload)
     1655 +                        if (ptload || phdr->p_flags == 0)
1338 1656                                  goto bad;
1339      -                        *uphdr = phdr;
     1657 +
     1658 +                        if (uphdr != NULL)
     1659 +                                *uphdr = phdr;
     1660 +
1340 1661                          break;
1341 1662  
1342 1663                  case PT_NULL:
1343 1664                  case PT_DYNAMIC:
1344 1665                  case PT_NOTE:
1345 1666                          break;
1346 1667  
1347 1668                  case PT_SUNWDTRACE:
1348 1669                          if (dtphdr != NULL)
1349 1670                                  *dtphdr = phdr;
1350 1671                          break;
1351 1672  
1352 1673                  default:
1353 1674                          break;
1354 1675                  }
1355 1676                  phdr = (Phdr *)((caddr_t)phdr + hsize);
1356 1677          }
1357 1678  
1358 1679          if (minaddr != NULL) {
1359 1680                  ASSERT(mintmp != (caddr_t)-1);
1360 1681                  *minaddr = (intptr_t)mintmp;
1361 1682          }
1362 1683  
1363 1684          return (0);
1364 1685  bad:
1365 1686          if (error == 0)
1366 1687                  error = EINVAL;
1367 1688          return (error);
1368 1689  }
1369 1690  
1370 1691  int
1371 1692  elfnote(vnode_t *vp, offset_t *offsetp, int type, int descsz, void *desc,
1372 1693      rlim64_t rlimit, cred_t *credp)
1373 1694  {
1374 1695          Note note;
1375 1696          int error;
1376 1697  
1377 1698          bzero(¬e, sizeof (note));
1378 1699          bcopy("CORE", note.name, 4);
1379 1700          note.nhdr.n_type = type;
1380 1701          /*
1381 1702           * The System V ABI states that n_namesz must be the length of the
1382 1703           * string that follows the Nhdr structure including the terminating
1383 1704           * null. The ABI also specifies that sufficient padding should be
1384 1705           * included so that the description that follows the name string
1385 1706           * begins on a 4- or 8-byte boundary for 32- and 64-bit binaries
1386 1707           * respectively. However, since this change was not made correctly
1387 1708           * at the time of the 64-bit port, both 32- and 64-bit binaries
1388 1709           * descriptions are only guaranteed to begin on a 4-byte boundary.
1389 1710           */
1390 1711          note.nhdr.n_namesz = 5;
1391 1712          note.nhdr.n_descsz = roundup(descsz, sizeof (Word));
1392 1713  
1393 1714          if (error = core_write(vp, UIO_SYSSPACE, *offsetp, ¬e,
1394 1715              sizeof (note), rlimit, credp))
1395 1716                  return (error);
1396 1717  
1397 1718          *offsetp += sizeof (note);
1398 1719  
1399 1720          if (error = core_write(vp, UIO_SYSSPACE, *offsetp, desc,
1400 1721              note.nhdr.n_descsz, rlimit, credp))
1401 1722                  return (error);
1402 1723  
1403 1724          *offsetp += note.nhdr.n_descsz;
1404 1725          return (0);
1405 1726  }
1406 1727  
1407 1728  /*
1408 1729   * Copy the section data from one vnode to the section of another vnode.
1409 1730   */
1410 1731  static void
1411 1732  copy_scn(Shdr *src, vnode_t *src_vp, Shdr *dst, vnode_t *dst_vp, Off *doffset,
1412 1733      void *buf, size_t size, cred_t *credp, rlim64_t rlimit)
1413 1734  {
1414 1735          ssize_t resid;
1415 1736          size_t len, n = src->sh_size;
1416 1737          offset_t off = 0;
1417 1738  
1418 1739          while (n != 0) {
1419 1740                  len = MIN(size, n);
1420 1741                  if (vn_rdwr(UIO_READ, src_vp, buf, len, src->sh_offset + off,
1421 1742                      UIO_SYSSPACE, 0, (rlim64_t)0, credp, &resid) != 0 ||
1422 1743                      resid >= len ||
1423 1744                      core_write(dst_vp, UIO_SYSSPACE, *doffset + off,
1424 1745                      buf, len - resid, rlimit, credp) != 0) {
1425 1746                          dst->sh_size = 0;
1426 1747                          dst->sh_offset = 0;
1427 1748                          return;
1428 1749                  }
1429 1750  
1430 1751                  ASSERT(n >= len - resid);
1431 1752  
1432 1753                  n -= len - resid;
1433 1754                  off += len - resid;
1434 1755          }
1435 1756  
1436 1757          *doffset += src->sh_size;
1437 1758  }
1438 1759  
1439 1760  #ifdef _ELF32_COMPAT
1440 1761  extern size_t elf_datasz_max;
1441 1762  #else
1442 1763  size_t elf_datasz_max = 1 * 1024 * 1024;
1443 1764  #endif
1444 1765  
1445 1766  /*
1446 1767   * This function processes mappings that correspond to load objects to
1447 1768   * examine their respective sections for elfcore(). It's called once with
1448 1769   * v set to NULL to count the number of sections that we're going to need
1449 1770   * and then again with v set to some allocated buffer that we fill in with
1450 1771   * all the section data.
1451 1772   */
1452 1773  static int
1453 1774  process_scns(core_content_t content, proc_t *p, cred_t *credp, vnode_t *vp,
1454 1775      Shdr *v, int nv, rlim64_t rlimit, Off *doffsetp, int *nshdrsp)
1455 1776  {
1456 1777          vnode_t *lastvp = NULL;
1457 1778          struct seg *seg;
1458 1779          int i, j;
1459 1780          void *data = NULL;
1460 1781          size_t datasz = 0;
1461 1782          shstrtab_t shstrtab;
1462 1783          struct as *as = p->p_as;
1463 1784          int error = 0;
1464 1785  
1465 1786          if (v != NULL)
1466 1787                  shstrtab_init(&shstrtab);
1467 1788  
1468 1789          i = 1;
1469 1790          for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1470 1791                  uint_t prot;
1471 1792                  vnode_t *mvp;
1472 1793                  void *tmp = NULL;
1473 1794                  caddr_t saddr = seg->s_base;
1474 1795                  caddr_t naddr;
1475 1796                  caddr_t eaddr;
1476 1797                  size_t segsize;
1477 1798  
1478 1799                  Ehdr ehdr;
1479 1800                  int nshdrs, shstrndx, nphdrs;
1480 1801                  caddr_t shbase;
1481 1802                  ssize_t shsize;
1482 1803                  char *shstrbase;
1483 1804                  ssize_t shstrsize;
1484 1805  
1485 1806                  Shdr *shdr;
1486 1807                  const char *name;
1487 1808                  size_t sz;
1488 1809                  uintptr_t off;
1489 1810  
1490 1811                  int ctf_ndx = 0;
1491 1812                  int symtab_ndx = 0;
1492 1813  
1493 1814                  /*
1494 1815                   * Since we're just looking for text segments of load
1495 1816                   * objects, we only care about the protection bits; we don't
1496 1817                   * care about the actual size of the segment so we use the
1497 1818                   * reserved size. If the segment's size is zero, there's
1498 1819                   * something fishy going on so we ignore this segment.
1499 1820                   */
1500 1821                  if (seg->s_ops != &segvn_ops ||
1501 1822                      SEGOP_GETVP(seg, seg->s_base, &mvp) != 0 ||
1502 1823                      mvp == lastvp || mvp == NULL || mvp->v_type != VREG ||
1503 1824                      (segsize = pr_getsegsize(seg, 1)) == 0)
1504 1825                          continue;
1505 1826  
1506 1827                  eaddr = saddr + segsize;
1507 1828                  prot = pr_getprot(seg, 1, &tmp, &saddr, &naddr, eaddr);
1508 1829                  pr_getprot_done(&tmp);
1509 1830  
1510 1831                  /*
1511 1832                   * Skip this segment unless the protection bits look like
1512 1833                   * what we'd expect for a text segment.
1513 1834                   */
1514 1835                  if ((prot & (PROT_WRITE | PROT_EXEC)) != PROT_EXEC)
1515 1836                          continue;
1516 1837  
1517 1838                  if (getelfhead(mvp, credp, &ehdr, &nshdrs, &shstrndx,
1518 1839                      &nphdrs) != 0 ||
1519 1840                      getelfshdr(mvp, credp, &ehdr, nshdrs, shstrndx,
1520 1841                      &shbase, &shsize, &shstrbase, &shstrsize) != 0)
1521 1842                          continue;
1522 1843  
1523 1844                  off = ehdr.e_shentsize;
1524 1845                  for (j = 1; j < nshdrs; j++, off += ehdr.e_shentsize) {
1525 1846                          Shdr *symtab = NULL, *strtab;
1526 1847  
1527 1848                          shdr = (Shdr *)(shbase + off);
1528 1849  
1529 1850                          if (shdr->sh_name >= shstrsize)
1530 1851                                  continue;
1531 1852  
1532 1853                          name = shstrbase + shdr->sh_name;
1533 1854  
1534 1855                          if (strcmp(name, shstrtab_data[STR_CTF]) == 0) {
1535 1856                                  if ((content & CC_CONTENT_CTF) == 0 ||
1536 1857                                      ctf_ndx != 0)
1537 1858                                          continue;
1538 1859  
1539 1860                                  if (shdr->sh_link > 0 &&
1540 1861                                      shdr->sh_link < nshdrs) {
1541 1862                                          symtab = (Shdr *)(shbase +
1542 1863                                              shdr->sh_link * ehdr.e_shentsize);
1543 1864                                  }
1544 1865  
1545 1866                                  if (v != NULL && i < nv - 1) {
1546 1867                                          if (shdr->sh_size > datasz &&
1547 1868                                              shdr->sh_size <= elf_datasz_max) {
1548 1869                                                  if (data != NULL)
1549 1870                                                          kmem_free(data, datasz);
1550 1871  
1551 1872                                                  datasz = shdr->sh_size;
1552 1873                                                  data = kmem_alloc(datasz,
1553 1874                                                      KM_SLEEP);
1554 1875                                          }
1555 1876  
1556 1877                                          v[i].sh_name = shstrtab_ndx(&shstrtab,
1557 1878                                              STR_CTF);
1558 1879                                          v[i].sh_addr = (Addr)(uintptr_t)saddr;
1559 1880                                          v[i].sh_type = SHT_PROGBITS;
1560 1881                                          v[i].sh_addralign = 4;
1561 1882                                          *doffsetp = roundup(*doffsetp,
1562 1883                                              v[i].sh_addralign);
1563 1884                                          v[i].sh_offset = *doffsetp;
1564 1885                                          v[i].sh_size = shdr->sh_size;
1565 1886                                          if (symtab == NULL)  {
1566 1887                                                  v[i].sh_link = 0;
1567 1888                                          } else if (symtab->sh_type ==
1568 1889                                              SHT_SYMTAB &&
1569 1890                                              symtab_ndx != 0) {
1570 1891                                                  v[i].sh_link =
1571 1892                                                      symtab_ndx;
1572 1893                                          } else {
1573 1894                                                  v[i].sh_link = i + 1;
1574 1895                                          }
1575 1896  
1576 1897                                          copy_scn(shdr, mvp, &v[i], vp,
1577 1898                                              doffsetp, data, datasz, credp,
1578 1899                                              rlimit);
1579 1900                                  }
1580 1901  
1581 1902                                  ctf_ndx = i++;
1582 1903  
1583 1904                                  /*
1584 1905                                   * We've already dumped the symtab.
1585 1906                                   */
1586 1907                                  if (symtab != NULL &&
1587 1908                                      symtab->sh_type == SHT_SYMTAB &&
1588 1909                                      symtab_ndx != 0)
1589 1910                                          continue;
1590 1911  
1591 1912                          } else if (strcmp(name,
1592 1913                              shstrtab_data[STR_SYMTAB]) == 0) {
1593 1914                                  if ((content & CC_CONTENT_SYMTAB) == 0 ||
1594 1915                                      symtab != 0)
1595 1916                                          continue;
1596 1917  
1597 1918                                  symtab = shdr;
1598 1919                          }
1599 1920  
1600 1921                          if (symtab != NULL) {
1601 1922                                  if ((symtab->sh_type != SHT_DYNSYM &&
1602 1923                                      symtab->sh_type != SHT_SYMTAB) ||
1603 1924                                      symtab->sh_link == 0 ||
1604 1925                                      symtab->sh_link >= nshdrs)
1605 1926                                          continue;
1606 1927  
1607 1928                                  strtab = (Shdr *)(shbase +
1608 1929                                      symtab->sh_link * ehdr.e_shentsize);
1609 1930  
1610 1931                                  if (strtab->sh_type != SHT_STRTAB)
1611 1932                                          continue;
1612 1933  
1613 1934                                  if (v != NULL && i < nv - 2) {
1614 1935                                          sz = MAX(symtab->sh_size,
1615 1936                                              strtab->sh_size);
1616 1937                                          if (sz > datasz &&
1617 1938                                              sz <= elf_datasz_max) {
1618 1939                                                  if (data != NULL)
1619 1940                                                          kmem_free(data, datasz);
1620 1941  
1621 1942                                                  datasz = sz;
1622 1943                                                  data = kmem_alloc(datasz,
1623 1944                                                      KM_SLEEP);
1624 1945                                          }
1625 1946  
1626 1947                                          if (symtab->sh_type == SHT_DYNSYM) {
1627 1948                                                  v[i].sh_name = shstrtab_ndx(
1628 1949                                                      &shstrtab, STR_DYNSYM);
1629 1950                                                  v[i + 1].sh_name = shstrtab_ndx(
1630 1951                                                      &shstrtab, STR_DYNSTR);
1631 1952                                          } else {
1632 1953                                                  v[i].sh_name = shstrtab_ndx(
1633 1954                                                      &shstrtab, STR_SYMTAB);
1634 1955                                                  v[i + 1].sh_name = shstrtab_ndx(
1635 1956                                                      &shstrtab, STR_STRTAB);
1636 1957                                          }
1637 1958  
1638 1959                                          v[i].sh_type = symtab->sh_type;
1639 1960                                          v[i].sh_addr = symtab->sh_addr;
1640 1961                                          if (ehdr.e_type == ET_DYN ||
1641 1962                                              v[i].sh_addr == 0)
1642 1963                                                  v[i].sh_addr +=
1643 1964                                                      (Addr)(uintptr_t)saddr;
1644 1965                                          v[i].sh_addralign =
1645 1966                                              symtab->sh_addralign;
1646 1967                                          *doffsetp = roundup(*doffsetp,
1647 1968                                              v[i].sh_addralign);
1648 1969                                          v[i].sh_offset = *doffsetp;
1649 1970                                          v[i].sh_size = symtab->sh_size;
1650 1971                                          v[i].sh_link = i + 1;
1651 1972                                          v[i].sh_entsize = symtab->sh_entsize;
1652 1973                                          v[i].sh_info = symtab->sh_info;
1653 1974  
1654 1975                                          copy_scn(symtab, mvp, &v[i], vp,
1655 1976                                              doffsetp, data, datasz, credp,
1656 1977                                              rlimit);
1657 1978  
1658 1979                                          v[i + 1].sh_type = SHT_STRTAB;
1659 1980                                          v[i + 1].sh_flags = SHF_STRINGS;
1660 1981                                          v[i + 1].sh_addr = symtab->sh_addr;
1661 1982                                          if (ehdr.e_type == ET_DYN ||
1662 1983                                              v[i + 1].sh_addr == 0)
1663 1984                                                  v[i + 1].sh_addr +=
1664 1985                                                      (Addr)(uintptr_t)saddr;
1665 1986                                          v[i + 1].sh_addralign =
1666 1987                                              strtab->sh_addralign;
1667 1988                                          *doffsetp = roundup(*doffsetp,
1668 1989                                              v[i + 1].sh_addralign);
1669 1990                                          v[i + 1].sh_offset = *doffsetp;
1670 1991                                          v[i + 1].sh_size = strtab->sh_size;
1671 1992  
1672 1993                                          copy_scn(strtab, mvp, &v[i + 1], vp,
1673 1994                                              doffsetp, data, datasz, credp,
1674 1995                                              rlimit);
1675 1996                                  }
1676 1997  
1677 1998                                  if (symtab->sh_type == SHT_SYMTAB)
1678 1999                                          symtab_ndx = i;
1679 2000                                  i += 2;
1680 2001                          }
1681 2002                  }
1682 2003  
1683 2004                  kmem_free(shstrbase, shstrsize);
1684 2005                  kmem_free(shbase, shsize);
1685 2006  
1686 2007                  lastvp = mvp;
1687 2008          }
1688 2009  
1689 2010          if (v == NULL) {
1690 2011                  if (i == 1)
1691 2012                          *nshdrsp = 0;
1692 2013                  else
1693 2014                          *nshdrsp = i + 1;
1694 2015                  goto done;
1695 2016          }
1696 2017  
1697 2018          if (i != nv - 1) {
1698 2019                  cmn_err(CE_WARN, "elfcore: core dump failed for "
1699 2020                      "process %d; address space is changing", p->p_pid);
1700 2021                  error = EIO;
1701 2022                  goto done;
1702 2023          }
1703 2024  
1704 2025          v[i].sh_name = shstrtab_ndx(&shstrtab, STR_SHSTRTAB);
1705 2026          v[i].sh_size = shstrtab_size(&shstrtab);
1706 2027          v[i].sh_addralign = 1;
1707 2028          *doffsetp = roundup(*doffsetp, v[i].sh_addralign);
1708 2029          v[i].sh_offset = *doffsetp;
1709 2030          v[i].sh_flags = SHF_STRINGS;
1710 2031          v[i].sh_type = SHT_STRTAB;
1711 2032  
1712 2033          if (v[i].sh_size > datasz) {
1713 2034                  if (data != NULL)
1714 2035                          kmem_free(data, datasz);
1715 2036  
1716 2037                  datasz = v[i].sh_size;
1717 2038                  data = kmem_alloc(datasz,
1718 2039                      KM_SLEEP);
1719 2040          }
1720 2041  
1721 2042          shstrtab_dump(&shstrtab, data);
1722 2043  
1723 2044          if ((error = core_write(vp, UIO_SYSSPACE, *doffsetp,
1724 2045              data, v[i].sh_size, rlimit, credp)) != 0)
1725 2046                  goto done;
1726 2047  
1727 2048          *doffsetp += v[i].sh_size;
1728 2049  
1729 2050  done:
1730 2051          if (data != NULL)
1731 2052                  kmem_free(data, datasz);
1732 2053  
1733 2054          return (error);
1734 2055  }
1735 2056  
1736 2057  int
1737 2058  elfcore(vnode_t *vp, proc_t *p, cred_t *credp, rlim64_t rlimit, int sig,
1738 2059      core_content_t content)
1739 2060  {
1740 2061          offset_t poffset, soffset;
1741 2062          Off doffset;
1742 2063          int error, i, nphdrs, nshdrs;
1743 2064          int overflow = 0;
1744 2065          struct seg *seg;
1745 2066          struct as *as = p->p_as;
1746 2067          union {
1747 2068                  Ehdr ehdr;
1748 2069                  Phdr phdr[1];
1749 2070                  Shdr shdr[1];
1750 2071          } *bigwad;
1751 2072          size_t bigsize;
1752 2073          size_t phdrsz, shdrsz;
1753 2074          Ehdr *ehdr;
1754 2075          Phdr *v;
1755 2076          caddr_t brkbase;
1756 2077          size_t brksize;
1757 2078          caddr_t stkbase;
1758 2079          size_t stksize;
1759 2080          int ntries = 0;
1760 2081          klwp_t *lwp = ttolwp(curthread);
1761 2082  
1762 2083  top:
1763 2084          /*
1764 2085           * Make sure we have everything we need (registers, etc.).
1765 2086           * All other lwps have already stopped and are in an orderly state.
1766 2087           */
1767 2088          ASSERT(p == ttoproc(curthread));
1768 2089          prstop(0, 0);
1769 2090  
1770 2091          AS_LOCK_ENTER(as, RW_WRITER);
1771 2092          nphdrs = prnsegs(as, 0) + 2;            /* two CORE note sections */
1772 2093  
1773 2094          /*
1774 2095           * Count the number of section headers we're going to need.
1775 2096           */
1776 2097          nshdrs = 0;
1777 2098          if (content & (CC_CONTENT_CTF | CC_CONTENT_SYMTAB)) {
1778 2099                  (void) process_scns(content, p, credp, NULL, NULL, NULL, 0,
1779 2100                      NULL, &nshdrs);
1780 2101          }
1781 2102          AS_LOCK_EXIT(as);
1782 2103  
1783 2104          ASSERT(nshdrs == 0 || nshdrs > 1);
1784 2105  
1785 2106          /*
1786 2107           * The core file contents may required zero section headers, but if
1787 2108           * we overflow the 16 bits allotted to the program header count in
1788 2109           * the ELF header, we'll need that program header at index zero.
1789 2110           */
1790 2111          if (nshdrs == 0 && nphdrs >= PN_XNUM)
1791 2112                  nshdrs = 1;
1792 2113  
1793 2114          phdrsz = nphdrs * sizeof (Phdr);
1794 2115          shdrsz = nshdrs * sizeof (Shdr);
1795 2116  
1796 2117          bigsize = MAX(sizeof (*bigwad), MAX(phdrsz, shdrsz));
1797 2118          bigwad = kmem_alloc(bigsize, KM_SLEEP);
1798 2119  
1799 2120          ehdr = &bigwad->ehdr;
1800 2121          bzero(ehdr, sizeof (*ehdr));
1801 2122  
1802 2123          ehdr->e_ident[EI_MAG0] = ELFMAG0;
1803 2124          ehdr->e_ident[EI_MAG1] = ELFMAG1;
1804 2125          ehdr->e_ident[EI_MAG2] = ELFMAG2;
1805 2126          ehdr->e_ident[EI_MAG3] = ELFMAG3;
1806 2127          ehdr->e_ident[EI_CLASS] = ELFCLASS;
1807 2128          ehdr->e_type = ET_CORE;
1808 2129  
1809 2130  #if !defined(_LP64) || defined(_ELF32_COMPAT)
1810 2131  
1811 2132  #if defined(__sparc)
1812 2133          ehdr->e_ident[EI_DATA] = ELFDATA2MSB;
1813 2134          ehdr->e_machine = EM_SPARC;
1814 2135  #elif defined(__i386) || defined(__i386_COMPAT)
1815 2136          ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
1816 2137          ehdr->e_machine = EM_386;
1817 2138  #else
1818 2139  #error "no recognized machine type is defined"
1819 2140  #endif
1820 2141  
1821 2142  #else   /* !defined(_LP64) || defined(_ELF32_COMPAT) */
1822 2143  
1823 2144  #if defined(__sparc)
1824 2145          ehdr->e_ident[EI_DATA] = ELFDATA2MSB;
1825 2146          ehdr->e_machine = EM_SPARCV9;
1826 2147  #elif defined(__amd64)
1827 2148          ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
1828 2149          ehdr->e_machine = EM_AMD64;
1829 2150  #else
1830 2151  #error "no recognized 64-bit machine type is defined"
1831 2152  #endif
1832 2153  
1833 2154  #endif  /* !defined(_LP64) || defined(_ELF32_COMPAT) */
1834 2155  
1835 2156          /*
1836 2157           * If the count of program headers or section headers or the index
1837 2158           * of the section string table can't fit in the mere 16 bits
1838 2159           * shortsightedly allotted to them in the ELF header, we use the
1839 2160           * extended formats and put the real values in the section header
1840 2161           * as index 0.
1841 2162           */
1842 2163          ehdr->e_version = EV_CURRENT;
1843 2164          ehdr->e_ehsize = sizeof (Ehdr);
1844 2165  
1845 2166          if (nphdrs >= PN_XNUM)
1846 2167                  ehdr->e_phnum = PN_XNUM;
1847 2168          else
1848 2169                  ehdr->e_phnum = (unsigned short)nphdrs;
1849 2170  
1850 2171          ehdr->e_phoff = sizeof (Ehdr);
1851 2172          ehdr->e_phentsize = sizeof (Phdr);
1852 2173  
1853 2174          if (nshdrs > 0) {
1854 2175                  if (nshdrs >= SHN_LORESERVE)
1855 2176                          ehdr->e_shnum = 0;
1856 2177                  else
1857 2178                          ehdr->e_shnum = (unsigned short)nshdrs;
1858 2179  
1859 2180                  if (nshdrs - 1 >= SHN_LORESERVE)
1860 2181                          ehdr->e_shstrndx = SHN_XINDEX;
1861 2182                  else
1862 2183                          ehdr->e_shstrndx = (unsigned short)(nshdrs - 1);
1863 2184  
1864 2185                  ehdr->e_shoff = ehdr->e_phoff + ehdr->e_phentsize * nphdrs;
1865 2186                  ehdr->e_shentsize = sizeof (Shdr);
1866 2187          }
1867 2188  
1868 2189          if (error = core_write(vp, UIO_SYSSPACE, (offset_t)0, ehdr,
1869 2190              sizeof (Ehdr), rlimit, credp))
1870 2191                  goto done;
1871 2192  
1872 2193          poffset = sizeof (Ehdr);
1873 2194          soffset = sizeof (Ehdr) + phdrsz;
1874 2195          doffset = sizeof (Ehdr) + phdrsz + shdrsz;
1875 2196  
1876 2197          v = &bigwad->phdr[0];
1877 2198          bzero(v, phdrsz);
1878 2199  
1879 2200          setup_old_note_header(&v[0], p);
1880 2201          v[0].p_offset = doffset = roundup(doffset, sizeof (Word));
1881 2202          doffset += v[0].p_filesz;
1882 2203  
1883 2204          setup_note_header(&v[1], p);
1884 2205          v[1].p_offset = doffset = roundup(doffset, sizeof (Word));
1885 2206          doffset += v[1].p_filesz;
1886 2207  
1887 2208          mutex_enter(&p->p_lock);
1888 2209  
1889 2210          brkbase = p->p_brkbase;
1890 2211          brksize = p->p_brksize;
1891 2212  
1892 2213          stkbase = p->p_usrstack - p->p_stksize;
1893 2214          stksize = p->p_stksize;
1894 2215  
1895 2216          mutex_exit(&p->p_lock);
1896 2217  
1897 2218          AS_LOCK_ENTER(as, RW_WRITER);
1898 2219          i = 2;
1899 2220          for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1900 2221                  caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1901 2222                  caddr_t saddr, naddr;
1902 2223                  void *tmp = NULL;
1903 2224                  extern struct seg_ops segspt_shmops;
1904 2225  
1905 2226                  for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1906 2227                          uint_t prot;
1907 2228                          size_t size;
1908 2229                          int type;
1909 2230                          vnode_t *mvp;
1910 2231  
1911 2232                          prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1912 2233                          prot &= PROT_READ | PROT_WRITE | PROT_EXEC;
1913 2234                          if ((size = (size_t)(naddr - saddr)) == 0)
1914 2235                                  continue;
1915 2236                          if (i == nphdrs) {
1916 2237                                  overflow++;
1917 2238                                  continue;
1918 2239                          }
1919 2240                          v[i].p_type = PT_LOAD;
1920 2241                          v[i].p_vaddr = (Addr)(uintptr_t)saddr;
1921 2242                          v[i].p_memsz = size;
1922 2243                          if (prot & PROT_READ)
1923 2244                                  v[i].p_flags |= PF_R;
1924 2245                          if (prot & PROT_WRITE)
1925 2246                                  v[i].p_flags |= PF_W;
1926 2247                          if (prot & PROT_EXEC)
1927 2248                                  v[i].p_flags |= PF_X;
1928 2249  
1929 2250                          /*
1930 2251                           * Figure out which mappings to include in the core.
1931 2252                           */
1932 2253                          type = SEGOP_GETTYPE(seg, saddr);
1933 2254  
1934 2255                          if (saddr == stkbase && size == stksize) {
1935 2256                                  if (!(content & CC_CONTENT_STACK))
1936 2257                                          goto exclude;
1937 2258  
1938 2259                          } else if (saddr == brkbase && size == brksize) {
1939 2260                                  if (!(content & CC_CONTENT_HEAP))
1940 2261                                          goto exclude;
1941 2262  
1942 2263                          } else if (seg->s_ops == &segspt_shmops) {
1943 2264                                  if (type & MAP_NORESERVE) {
1944 2265                                          if (!(content & CC_CONTENT_DISM))
1945 2266                                                  goto exclude;
1946 2267                                  } else {
1947 2268                                          if (!(content & CC_CONTENT_ISM))
1948 2269                                                  goto exclude;
1949 2270                                  }
1950 2271  
1951 2272                          } else if (seg->s_ops != &segvn_ops) {
1952 2273                                  goto exclude;
1953 2274  
1954 2275                          } else if (type & MAP_SHARED) {
1955 2276                                  if (shmgetid(p, saddr) != SHMID_NONE) {
1956 2277                                          if (!(content & CC_CONTENT_SHM))
1957 2278                                                  goto exclude;
1958 2279  
1959 2280                                  } else if (SEGOP_GETVP(seg, seg->s_base,
1960 2281                                      &mvp) != 0 || mvp == NULL ||
1961 2282                                      mvp->v_type != VREG) {
1962 2283                                          if (!(content & CC_CONTENT_SHANON))
1963 2284                                                  goto exclude;
1964 2285  
1965 2286                                  } else {
1966 2287                                          if (!(content & CC_CONTENT_SHFILE))
1967 2288                                                  goto exclude;
1968 2289                                  }
1969 2290  
1970 2291                          } else if (SEGOP_GETVP(seg, seg->s_base, &mvp) != 0 ||
1971 2292                              mvp == NULL || mvp->v_type != VREG) {
1972 2293                                  if (!(content & CC_CONTENT_ANON))
1973 2294                                          goto exclude;
1974 2295  
1975 2296                          } else if (prot == (PROT_READ | PROT_EXEC)) {
1976 2297                                  if (!(content & CC_CONTENT_TEXT))
1977 2298                                          goto exclude;
1978 2299  
1979 2300                          } else if (prot == PROT_READ) {
1980 2301                                  if (!(content & CC_CONTENT_RODATA))
1981 2302                                          goto exclude;
1982 2303  
1983 2304                          } else {
1984 2305                                  if (!(content & CC_CONTENT_DATA))
1985 2306                                          goto exclude;
1986 2307                          }
1987 2308  
1988 2309                          doffset = roundup(doffset, sizeof (Word));
1989 2310                          v[i].p_offset = doffset;
1990 2311                          v[i].p_filesz = size;
1991 2312                          doffset += size;
1992 2313  exclude:
1993 2314                          i++;
1994 2315                  }
1995 2316                  ASSERT(tmp == NULL);
1996 2317          }
1997 2318          AS_LOCK_EXIT(as);
1998 2319  
1999 2320          if (overflow || i != nphdrs) {
2000 2321                  if (ntries++ == 0) {
2001 2322                          kmem_free(bigwad, bigsize);
2002 2323                          overflow = 0;
2003 2324                          goto top;
2004 2325                  }
2005 2326                  cmn_err(CE_WARN, "elfcore: core dump failed for "
2006 2327                      "process %d; address space is changing", p->p_pid);
2007 2328                  error = EIO;
2008 2329                  goto done;
2009 2330          }
2010 2331  
2011 2332          if ((error = core_write(vp, UIO_SYSSPACE, poffset,
2012 2333              v, phdrsz, rlimit, credp)) != 0)
2013 2334                  goto done;
2014 2335  
2015 2336          if ((error = write_old_elfnotes(p, sig, vp, v[0].p_offset, rlimit,
2016 2337              credp)) != 0)
2017 2338                  goto done;
2018 2339  
2019 2340          if ((error = write_elfnotes(p, sig, vp, v[1].p_offset, rlimit,
2020 2341              credp, content)) != 0)
2021 2342                  goto done;
2022 2343  
2023 2344          for (i = 2; i < nphdrs; i++) {
2024 2345                  prkillinfo_t killinfo;
2025 2346                  sigqueue_t *sq;
2026 2347                  int sig, j;
2027 2348  
2028 2349                  if (v[i].p_filesz == 0)
2029 2350                          continue;
2030 2351  
2031 2352                  /*
2032 2353                   * If dumping out this segment fails, rather than failing
2033 2354                   * the core dump entirely, we reset the size of the mapping
2034 2355                   * to zero to indicate that the data is absent from the core
2035 2356                   * file and or in the PF_SUNW_FAILURE flag to differentiate
2036 2357                   * this from mappings that were excluded due to the core file
2037 2358                   * content settings.
2038 2359                   */
2039 2360                  if ((error = core_seg(p, vp, v[i].p_offset,
2040 2361                      (caddr_t)(uintptr_t)v[i].p_vaddr, v[i].p_filesz,
2041 2362                      rlimit, credp)) == 0) {
2042 2363                          continue;
2043 2364                  }
2044 2365  
2045 2366                  if ((sig = lwp->lwp_cursig) == 0) {
2046 2367                          /*
2047 2368                           * We failed due to something other than a signal.
2048 2369                           * Since the space reserved for the segment is now
2049 2370                           * unused, we stash the errno in the first four
2050 2371                           * bytes. This undocumented interface will let us
2051 2372                           * understand the nature of the failure.
2052 2373                           */
2053 2374                          (void) core_write(vp, UIO_SYSSPACE, v[i].p_offset,
2054 2375                              &error, sizeof (error), rlimit, credp);
2055 2376  
2056 2377                          v[i].p_filesz = 0;
2057 2378                          v[i].p_flags |= PF_SUNW_FAILURE;
2058 2379                          if ((error = core_write(vp, UIO_SYSSPACE,
2059 2380                              poffset + sizeof (v[i]) * i, &v[i], sizeof (v[i]),
2060 2381                              rlimit, credp)) != 0)
2061 2382                                  goto done;
2062 2383  
2063 2384                          continue;
2064 2385                  }
2065 2386  
2066 2387                  /*
2067 2388                   * We took a signal.  We want to abort the dump entirely, but
2068 2389                   * we also want to indicate what failed and why.  We therefore
2069 2390                   * use the space reserved for the first failing segment to
2070 2391                   * write our error (which, for purposes of compatability with
2071 2392                   * older core dump readers, we set to EINTR) followed by any
2072 2393                   * siginfo associated with the signal.
2073 2394                   */
2074 2395                  bzero(&killinfo, sizeof (killinfo));
2075 2396                  killinfo.prk_error = EINTR;
2076 2397  
2077 2398                  sq = sig == SIGKILL ? curproc->p_killsqp : lwp->lwp_curinfo;
2078 2399  
2079 2400                  if (sq != NULL) {
2080 2401                          bcopy(&sq->sq_info, &killinfo.prk_info,
2081 2402                              sizeof (sq->sq_info));
2082 2403                  } else {
2083 2404                          killinfo.prk_info.si_signo = lwp->lwp_cursig;
2084 2405                          killinfo.prk_info.si_code = SI_NOINFO;
2085 2406                  }
2086 2407  
2087 2408  #if (defined(_SYSCALL32_IMPL) || defined(_LP64))
2088 2409                  /*
2089 2410                   * If this is a 32-bit process, we need to translate from the
2090 2411                   * native siginfo to the 32-bit variant.  (Core readers must
2091 2412                   * always have the same data model as their target or must
2092 2413                   * be aware of -- and compensate for -- data model differences.)
2093 2414                   */
2094 2415                  if (curproc->p_model == DATAMODEL_ILP32) {
2095 2416                          siginfo32_t si32;
2096 2417  
2097 2418                          siginfo_kto32((k_siginfo_t *)&killinfo.prk_info, &si32);
2098 2419                          bcopy(&si32, &killinfo.prk_info, sizeof (si32));
2099 2420                  }
2100 2421  #endif
2101 2422  
2102 2423                  (void) core_write(vp, UIO_SYSSPACE, v[i].p_offset,
2103 2424                      &killinfo, sizeof (killinfo), rlimit, credp);
2104 2425  
2105 2426                  /*
2106 2427                   * For the segment on which we took the signal, indicate that
2107 2428                   * its data now refers to a siginfo.
2108 2429                   */
2109 2430                  v[i].p_filesz = 0;
2110 2431                  v[i].p_flags |= PF_SUNW_FAILURE | PF_SUNW_KILLED |
2111 2432                      PF_SUNW_SIGINFO;
2112 2433  
2113 2434                  /*
2114 2435                   * And for every other segment, indicate that its absence
2115 2436                   * is due to a signal.
2116 2437                   */
2117 2438                  for (j = i + 1; j < nphdrs; j++) {
2118 2439                          v[j].p_filesz = 0;
2119 2440                          v[j].p_flags |= PF_SUNW_FAILURE | PF_SUNW_KILLED;
2120 2441                  }
2121 2442  
2122 2443                  /*
2123 2444                   * Finally, write out our modified program headers.
2124 2445                   */
2125 2446                  if ((error = core_write(vp, UIO_SYSSPACE,
2126 2447                      poffset + sizeof (v[i]) * i, &v[i],
2127 2448                      sizeof (v[i]) * (nphdrs - i), rlimit, credp)) != 0)
2128 2449                          goto done;
2129 2450  
2130 2451                  break;
2131 2452          }
2132 2453  
2133 2454          if (nshdrs > 0) {
2134 2455                  bzero(&bigwad->shdr[0], shdrsz);
2135 2456  
2136 2457                  if (nshdrs >= SHN_LORESERVE)
2137 2458                          bigwad->shdr[0].sh_size = nshdrs;
2138 2459  
2139 2460                  if (nshdrs - 1 >= SHN_LORESERVE)
2140 2461                          bigwad->shdr[0].sh_link = nshdrs - 1;
2141 2462  
2142 2463                  if (nphdrs >= PN_XNUM)
2143 2464                          bigwad->shdr[0].sh_info = nphdrs;
2144 2465  
2145 2466                  if (nshdrs > 1) {
2146 2467                          AS_LOCK_ENTER(as, RW_WRITER);
2147 2468                          if ((error = process_scns(content, p, credp, vp,
2148 2469                              &bigwad->shdr[0], nshdrs, rlimit, &doffset,
2149 2470                              NULL)) != 0) {
2150 2471                                  AS_LOCK_EXIT(as);
2151 2472                                  goto done;
2152 2473                          }
2153 2474                          AS_LOCK_EXIT(as);
2154 2475                  }
2155 2476  
2156 2477                  if ((error = core_write(vp, UIO_SYSSPACE, soffset,
2157 2478                      &bigwad->shdr[0], shdrsz, rlimit, credp)) != 0)
2158 2479                          goto done;
2159 2480          }
2160 2481  
2161 2482  done:
2162 2483          kmem_free(bigwad, bigsize);
2163 2484          return (error);
2164 2485  }
2165 2486  
2166 2487  #ifndef _ELF32_COMPAT
2167 2488  
2168 2489  static struct execsw esw = {
2169 2490  #ifdef  _LP64
2170 2491          elf64magicstr,
2171 2492  #else   /* _LP64 */
2172 2493          elf32magicstr,
2173 2494  #endif  /* _LP64 */
2174 2495          0,
2175 2496          5,
2176 2497          elfexec,
2177 2498          elfcore
  
    | 
      ↓ open down ↓ | 
    828 lines elided | 
    
      ↑ open up ↑ | 
  
2178 2499  };
2179 2500  
2180 2501  static struct modlexec modlexec = {
2181 2502          &mod_execops, "exec module for elf", &esw
2182 2503  };
2183 2504  
2184 2505  #ifdef  _LP64
2185 2506  extern int elf32exec(vnode_t *vp, execa_t *uap, uarg_t *args,
2186 2507                          intpdata_t *idatap, int level, long *execsz,
2187 2508                          int setid, caddr_t exec_file, cred_t *cred,
2188      -                        int brand_action);
     2509 +                        int *brand_action);
2189 2510  extern int elf32core(vnode_t *vp, proc_t *p, cred_t *credp,
2190 2511                          rlim64_t rlimit, int sig, core_content_t content);
2191 2512  
2192 2513  static struct execsw esw32 = {
2193 2514          elf32magicstr,
2194 2515          0,
2195 2516          5,
2196 2517          elf32exec,
2197 2518          elf32core
2198 2519  };
2199 2520  
2200 2521  static struct modlexec modlexec32 = {
2201 2522          &mod_execops, "32-bit exec module for elf", &esw32
2202 2523  };
2203 2524  #endif  /* _LP64 */
2204 2525  
2205 2526  static struct modlinkage modlinkage = {
2206 2527          MODREV_1,
2207 2528          (void *)&modlexec,
2208 2529  #ifdef  _LP64
2209 2530          (void *)&modlexec32,
2210 2531  #endif  /* _LP64 */
2211 2532          NULL
2212 2533  };
2213 2534  
2214 2535  int
2215 2536  _init(void)
2216 2537  {
2217 2538          return (mod_install(&modlinkage));
2218 2539  }
2219 2540  
2220 2541  int
2221 2542  _fini(void)
2222 2543  {
2223 2544          return (mod_remove(&modlinkage));
2224 2545  }
2225 2546  
2226 2547  int
2227 2548  _info(struct modinfo *modinfop)
2228 2549  {
2229 2550          return (mod_info(&modlinkage, modinfop));
2230 2551  }
2231 2552  
2232 2553  #endif  /* !_ELF32_COMPAT */
  
    | 
      ↓ open down ↓ | 
    34 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX