Print this page
    
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/exec/elf/elf.c
          +++ new/usr/src/uts/common/exec/elf/elf.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   */
  25   25  
  26   26  /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
  27   27  /*        All Rights Reserved   */
  28   28  /*
  29   29   * Copyright 2016 Joyent, Inc.
  30   30   */
  31   31  
  32   32  #include <sys/types.h>
  33   33  #include <sys/param.h>
  34   34  #include <sys/thread.h>
  35   35  #include <sys/sysmacros.h>
  36   36  #include <sys/signal.h>
  37   37  #include <sys/cred.h>
  38   38  #include <sys/user.h>
  39   39  #include <sys/errno.h>
  40   40  #include <sys/vnode.h>
  41   41  #include <sys/mman.h>
  42   42  #include <sys/kmem.h>
  43   43  #include <sys/proc.h>
  44   44  #include <sys/pathname.h>
  45   45  #include <sys/cmn_err.h>
  46   46  #include <sys/systm.h>
  47   47  #include <sys/elf.h>
  48   48  #include <sys/vmsystm.h>
  49   49  #include <sys/debug.h>
  50   50  #include <sys/auxv.h>
  51   51  #include <sys/exec.h>
  52   52  #include <sys/prsystm.h>
  53   53  #include <vm/as.h>
  54   54  #include <vm/rm.h>
  55   55  #include <vm/seg.h>
  56   56  #include <vm/seg_vn.h>
  57   57  #include <sys/modctl.h>
  58   58  #include <sys/systeminfo.h>
  59   59  #include <sys/vmparam.h>
  60   60  #include <sys/machelf.h>
  61   61  #include <sys/shm_impl.h>
  62   62  #include <sys/archsystm.h>
  63   63  #include <sys/fasttrap.h>
  64   64  #include <sys/brand.h>
  65   65  #include "elf_impl.h"
  66   66  #include <sys/sdt.h>
  67   67  #include <sys/siginfo.h>
  68   68  
  69   69  #if defined(__x86)
  70   70  #include <sys/comm_page_util.h>
  71   71  #endif /* defined(__x86) */
  72   72  
  73   73  
  74   74  extern int at_flags;
  75   75  
  76   76  #define ORIGIN_STR      "ORIGIN"
  77   77  #define ORIGIN_STR_SIZE 6
  78   78  
  79   79  static int getelfhead(vnode_t *, cred_t *, Ehdr *, int *, int *, int *);
  80   80  static int getelfphdr(vnode_t *, cred_t *, const Ehdr *, int, caddr_t *,
  81   81      ssize_t *);
  82   82  static int getelfshdr(vnode_t *, cred_t *, const Ehdr *, int, int, caddr_t *,
  83   83      ssize_t *, caddr_t *, ssize_t *);
  84   84  static size_t elfsize(Ehdr *, int, caddr_t, uintptr_t *);
  85   85  static int mapelfexec(vnode_t *, Ehdr *, int, caddr_t,
  86   86      Phdr **, Phdr **, Phdr **, Phdr **, Phdr *,
  87   87      caddr_t *, caddr_t *, intptr_t *, intptr_t *, size_t, long *, size_t *);
  88   88  
  89   89  typedef enum {
  90   90          STR_CTF,
  91   91          STR_SYMTAB,
  92   92          STR_DYNSYM,
  93   93          STR_STRTAB,
  94   94          STR_DYNSTR,
  95   95          STR_SHSTRTAB,
  96   96          STR_NUM
  97   97  } shstrtype_t;
  98   98  
  99   99  static const char *shstrtab_data[] = {
 100  100          ".SUNW_ctf",
 101  101          ".symtab",
 102  102          ".dynsym",
 103  103          ".strtab",
 104  104          ".dynstr",
 105  105          ".shstrtab"
 106  106  };
 107  107  
 108  108  typedef struct shstrtab {
 109  109          int     sst_ndx[STR_NUM];
 110  110          int     sst_cur;
 111  111  } shstrtab_t;
 112  112  
 113  113  static void
 114  114  shstrtab_init(shstrtab_t *s)
 115  115  {
 116  116          bzero(&s->sst_ndx, sizeof (s->sst_ndx));
 117  117          s->sst_cur = 1;
 118  118  }
 119  119  
 120  120  static int
 121  121  shstrtab_ndx(shstrtab_t *s, shstrtype_t type)
 122  122  {
 123  123          int ret;
 124  124  
 125  125          if ((ret = s->sst_ndx[type]) != 0)
 126  126                  return (ret);
 127  127  
 128  128          ret = s->sst_ndx[type] = s->sst_cur;
 129  129          s->sst_cur += strlen(shstrtab_data[type]) + 1;
 130  130  
 131  131          return (ret);
 132  132  }
 133  133  
 134  134  static size_t
 135  135  shstrtab_size(const shstrtab_t *s)
 136  136  {
 137  137          return (s->sst_cur);
 138  138  }
 139  139  
 140  140  static void
 141  141  shstrtab_dump(const shstrtab_t *s, char *buf)
 142  142  {
 143  143          int i, ndx;
 144  144  
 145  145          *buf = '\0';
 146  146          for (i = 0; i < STR_NUM; i++) {
 147  147                  if ((ndx = s->sst_ndx[i]) != 0)
 148  148                          (void) strcpy(buf + ndx, shstrtab_data[i]);
 149  149          }
 150  150  }
 151  151  
 152  152  static int
 153  153  dtrace_safe_phdr(Phdr *phdrp, struct uarg *args, uintptr_t base)
 154  154  {
 155  155          ASSERT(phdrp->p_type == PT_SUNWDTRACE);
 156  156  
 157  157          /*
 158  158           * See the comment in fasttrap.h for information on how to safely
 159  159           * update this program header.
 160  160           */
 161  161          if (phdrp->p_memsz < PT_SUNWDTRACE_SIZE ||
 162  162              (phdrp->p_flags & (PF_R | PF_W | PF_X)) != (PF_R | PF_W | PF_X))
 163  163                  return (-1);
 164  164  
 165  165          args->thrptr = phdrp->p_vaddr + base;
 166  166  
 167  167          return (0);
 168  168  }
 169  169  
 170  170  /*
 171  171   * Map in the executable pointed to by vp. Returns 0 on success.  Note that
 172  172   * this function currently has the maximum number of arguments allowed by
 173  173   * modstubs on x86 (MAXNARG)!  Do _not_ add to this function signature without
 174  174   * adding to MAXNARG.  (Better yet, do not add to this monster of a function
 175  175   * signature!)
 176  176   */
 177  177  int
 178  178  mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
 179  179      intptr_t *voffset, caddr_t exec_file, char **interpp, caddr_t *bssbase,
 180  180      caddr_t *brkbase, size_t *brksize, uintptr_t *lddatap, uintptr_t *minaddrp)
 181  181  {
 182  182          size_t          len;
 183  183          struct vattr    vat;
 184  184          caddr_t         phdrbase = NULL;
 185  185          ssize_t         phdrsize;
 186  186          int             nshdrs, shstrndx, nphdrs;
 187  187          int             error = 0;
 188  188          Phdr            *uphdr = NULL;
 189  189          Phdr            *junk = NULL;
 190  190          Phdr            *dynphdr = NULL;
 191  191          Phdr            *dtrphdr = NULL;
 192  192          char            *interp = NULL;
 193  193          uintptr_t       lddata;
 194  194          long            execsz;
 195  195          intptr_t        minaddr;
 196  196  
 197  197          if (lddatap != NULL)
 198  198                  *lddatap = NULL;
 199  199  
 200  200          if (minaddrp != NULL)
 201  201                  *minaddrp = NULL;
 202  202  
 203  203          if (error = execpermissions(vp, &vat, args)) {
 204  204                  uprintf("%s: Cannot execute %s\n", exec_file, args->pathname);
 205  205                  return (error);
 206  206          }
 207  207  
 208  208          if ((error = getelfhead(vp, CRED(), ehdr, &nshdrs, &shstrndx,
 209  209              &nphdrs)) != 0 ||
 210  210              (error = getelfphdr(vp, CRED(), ehdr, nphdrs, &phdrbase,
 211  211              &phdrsize)) != 0) {
 212  212                  uprintf("%s: Cannot read %s\n", exec_file, args->pathname);
 213  213                  return (error);
 214  214          }
 215  215  
 216  216          if ((len = elfsize(ehdr, nphdrs, phdrbase, &lddata)) == 0) {
 217  217                  uprintf("%s: Nothing to load in %s", exec_file, args->pathname);
 218  218                  kmem_free(phdrbase, phdrsize);
 219  219                  return (ENOEXEC);
 220  220          }
 221  221          if (lddatap != NULL)
 222  222                  *lddatap = lddata;
 223  223  
 224  224          if (error = mapelfexec(vp, ehdr, nphdrs, phdrbase, &uphdr, &dynphdr,
 225  225              &junk, &dtrphdr, NULL, bssbase, brkbase, voffset, &minaddr,
 226  226              len, &execsz, brksize)) {
 227  227                  uprintf("%s: Cannot map %s\n", exec_file, args->pathname);
 228  228                  if (uphdr != NULL && uphdr->p_flags == 0)
 229  229                          kmem_free(uphdr, sizeof (Phdr));
 230  230                  kmem_free(phdrbase, phdrsize);
 231  231                  return (error);
 232  232          }
 233  233  
 234  234          if (minaddrp != NULL)
 235  235                  *minaddrp = minaddr;
 236  236  
 237  237          /*
 238  238           * If the executable requires an interpreter, determine its name.
 239  239           */
 240  240          if (dynphdr != NULL) {
 241  241                  ssize_t resid;
 242  242  
 243  243                  if (dynphdr->p_filesz > MAXPATHLEN || dynphdr->p_filesz == 0) {
 244  244                          uprintf("%s: Invalid interpreter\n", exec_file);
 245  245                          kmem_free(phdrbase, phdrsize);
 246  246                          return (ENOEXEC);
 247  247                  }
 248  248  
 249  249                  interp = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 250  250  
 251  251                  if ((error = vn_rdwr(UIO_READ, vp, interp, dynphdr->p_filesz,
 252  252                      (offset_t)dynphdr->p_offset, UIO_SYSSPACE, 0,
 253  253                      (rlim64_t)0, CRED(), &resid)) != 0 || resid != 0 ||
 254  254                      interp[dynphdr->p_filesz - 1] != '\0') {
 255  255                          uprintf("%s: Cannot obtain interpreter pathname\n",
 256  256                              exec_file);
 257  257                          kmem_free(interp, MAXPATHLEN);
 258  258                          kmem_free(phdrbase, phdrsize);
 259  259                          return (error != 0 ? error : ENOEXEC);
 260  260                  }
 261  261          }
 262  262  
 263  263          /*
 264  264           * If this is a statically linked executable, voffset should indicate
 265  265           * the address of the executable itself (it normally holds the address
 266  266           * of the interpreter).
 267  267           */
 268  268          if (ehdr->e_type == ET_EXEC && interp == NULL)
 269  269                  *voffset = minaddr;
 270  270  
 271  271          /*
 272  272           * If the caller has asked for the interpreter name, return it (it's
 273  273           * up to the caller to free it); if the caller hasn't asked for it,
 274  274           * free it ourselves.
 275  275           */
 276  276          if (interpp != NULL) {
 277  277                  *interpp = interp;
 278  278          } else if (interp != NULL) {
 279  279                  kmem_free(interp, MAXPATHLEN);
 280  280          }
 281  281  
 282  282          if (uphdr != NULL) {
 283  283                  *uphdr_vaddr = uphdr->p_vaddr;
 284  284  
 285  285                  if (uphdr->p_flags == 0)
 286  286                          kmem_free(uphdr, sizeof (Phdr));
 287  287          } else if (ehdr->e_type == ET_DYN) {
 288  288                  /*
 289  289                   * If we don't have a uphdr, we'll apply the logic found
 290  290                   * in mapelfexec() and use the p_vaddr of the first PT_LOAD
 291  291                   * section as the base address of the object.
 292  292                   */
 293  293                  Phdr *phdr = (Phdr *)phdrbase;
 294  294                  int i, hsize = ehdr->e_phentsize;
 295  295  
 296  296                  for (i = nphdrs; i > 0; i--) {
 297  297                          if (phdr->p_type == PT_LOAD) {
 298  298                                  *uphdr_vaddr = (uintptr_t)phdr->p_vaddr +
 299  299                                      ehdr->e_phoff;
 300  300                                  break;
 301  301                          }
 302  302  
 303  303                          phdr = (Phdr *)((caddr_t)phdr + hsize);
 304  304                  }
 305  305  
 306  306                  /*
 307  307                   * If we don't have a PT_LOAD segment, we should have returned
 308  308                   * ENOEXEC when elfsize() returned 0, above.
 309  309                   */
 310  310                  VERIFY(i > 0);
 311  311          } else {
 312  312                  *uphdr_vaddr = (Addr)-1;
 313  313          }
 314  314  
 315  315          kmem_free(phdrbase, phdrsize);
 316  316          return (error);
 317  317  }
 318  318  
 319  319  /*ARGSUSED*/
 320  320  int
 321  321  elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
 322  322      int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred,
 323  323      int *brand_action)
 324  324  {
 325  325          caddr_t         phdrbase = NULL;
 326  326          caddr_t         bssbase = 0;
 327  327          caddr_t         brkbase = 0;
 328  328          size_t          brksize = 0;
 329  329          ssize_t         dlnsize, nsize = 0;
 330  330          aux_entry_t     *aux;
 331  331          int             error;
 332  332          ssize_t         resid;
 333  333          int             fd = -1;
 334  334          intptr_t        voffset;
 335  335          Phdr            *dyphdr = NULL;
 336  336          Phdr            *stphdr = NULL;
 337  337          Phdr            *uphdr = NULL;
 338  338          Phdr            *junk = NULL;
 339  339          size_t          len;
 340  340          ssize_t         phdrsize;
 341  341          int             postfixsize = 0;
 342  342          int             i, hsize;
 343  343          Phdr            *phdrp;
 344  344          Phdr            *dataphdrp = NULL;
 345  345          Phdr            *dtrphdr;
 346  346          Phdr            *capphdr = NULL;
 347  347          Cap             *cap = NULL;
 348  348          ssize_t         capsize;
 349  349          int             hasu = 0;
 350  350          int             hasauxv = 0;
 351  351          int             hasdy = 0;
 352  352          int             branded = 0;
 353  353          int             dynuphdr = 0;
 354  354  
 355  355          struct proc *p = ttoproc(curthread);
 356  356          struct user *up = PTOU(p);
 357  357          struct bigwad {
 358  358                  Ehdr    ehdr;
 359  359                  aux_entry_t     elfargs[__KERN_NAUXV_IMPL];
 360  360                  char            dl_name[MAXPATHLEN];
 361  361                  char            pathbuf[MAXPATHLEN];
 362  362                  struct vattr    vattr;
 363  363                  struct execenv  exenv;
 364  364          } *bigwad;      /* kmem_alloc this behemoth so we don't blow stack */
 365  365          Ehdr            *ehdrp;
 366  366          int             nshdrs, shstrndx, nphdrs;
 367  367          char            *dlnp;
 368  368          char            *pathbufp;
 369  369          rlim64_t        limit;
 370  370          rlim64_t        roundlimit;
 371  371  
 372  372          ASSERT(p->p_model == DATAMODEL_ILP32 || p->p_model == DATAMODEL_LP64);
 373  373  
 374  374          bigwad = kmem_alloc(sizeof (struct bigwad), KM_SLEEP);
 375  375          ehdrp = &bigwad->ehdr;
 376  376          dlnp = bigwad->dl_name;
 377  377          pathbufp = bigwad->pathbuf;
 378  378  
 379  379          /*
 380  380           * Obtain ELF and program header information.
 381  381           */
 382  382          if ((error = getelfhead(vp, CRED(), ehdrp, &nshdrs, &shstrndx,
 383  383              &nphdrs)) != 0 ||
 384  384              (error = getelfphdr(vp, CRED(), ehdrp, nphdrs, &phdrbase,
 385  385              &phdrsize)) != 0)
 386  386                  goto out;
 387  387  
 388  388          /*
 389  389           * Prevent executing an ELF file that has no entry point.
 390  390           */
 391  391          if (ehdrp->e_entry == 0) {
 392  392                  uprintf("%s: Bad entry point\n", exec_file);
 393  393                  goto bad;
 394  394          }
 395  395  
 396  396          /*
 397  397           * Put data model that we're exec-ing to into the args passed to
 398  398           * exec_args(), so it will know what it is copying to on new stack.
 399  399           * Now that we know whether we are exec-ing a 32-bit or 64-bit
 400  400           * executable, we can set execsz with the appropriate NCARGS.
 401  401           */
 402  402  #ifdef  _LP64
 403  403          if (ehdrp->e_ident[EI_CLASS] == ELFCLASS32) {
 404  404                  args->to_model = DATAMODEL_ILP32;
 405  405                  *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS32-1);
 406  406          } else {
 407  407                  args->to_model = DATAMODEL_LP64;
 408  408                  if (!args->stk_prot_override) {
 409  409                          args->stk_prot &= ~PROT_EXEC;
 410  410                  }
 411  411  #if defined(__i386) || defined(__amd64)
 412  412                  args->dat_prot &= ~PROT_EXEC;
 413  413  #endif
 414  414                  *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS64-1);
 415  415          }
 416  416  #else   /* _LP64 */
 417  417          args->to_model = DATAMODEL_ILP32;
 418  418          *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS-1);
 419  419  #endif  /* _LP64 */
 420  420  
 421  421          /*
 422  422           * We delay invoking the brand callback until we've figured out what
 423  423           * kind of elf binary we're trying to run, 32-bit or 64-bit.  We do this
 424  424           * because now the brand library can just check args->to_model to see if
 425  425           * the target is 32-bit or 64-bit without having do duplicate all the
 426  426           * code above.
 427  427           *
 428  428           * We also give the brand a chance to indicate that based on the ELF
 429  429           * OSABI of the target binary it should become unbranded and optionally
 430  430           * indicate that it should be treated as existing in a specific prefix.
 431  431           *
 432  432           * Note that if a brand opts to go down this route it does not actually
 433  433           * end up being debranded. In other words, future programs that exec
 434  434           * will still be considered for branding unless this escape hatch is
 435  435           * used. Consider the case of lx brand for example. If a user runs
 436  436           * /native/usr/sbin/dtrace -c /bin/ls, the isaexec and normal executable
 437  437           * of DTrace that's in /native will take this escape hatch and be run
 438  438           * and interpreted using the normal system call table; however, the
 439  439           * execution of a non-illumos binary in the form of /bin/ls will still
 440  440           * be branded and be subject to all of the normal actions of the brand.
 441  441           *
 442  442           * The level checks associated with brand handling below are used to
 443  443           * prevent a loop since the brand elfexec function typically comes back
 444  444           * through this function. We must check <= here since the nested
 445  445           * handling in the #! interpreter code will increment the level before
 446  446           * calling gexec to run the final elfexec interpreter.
 447  447           */
 448  448          if ((level <= INTP_MAXDEPTH) && (*brand_action != EBA_NATIVE) &&
 449  449              (PROC_IS_BRANDED(p)) && (BROP(p)->b_native_exec != NULL)) {
 450  450                  if (BROP(p)->b_native_exec(ehdrp->e_ident[EI_OSABI],
 451  451                      &args->brand_nroot) == B_TRUE) {
 452  452                          ASSERT(ehdrp->e_ident[EI_OSABI]);
 453  453                          *brand_action = EBA_NATIVE;
 454  454                          /* Add one for the trailing '/' in the path */
 455  455                          if (args->brand_nroot != NULL)
 456  456                                  nsize = strlen(args->brand_nroot) + 1;
 457  457                  }
 458  458          }
 459  459  
 460  460          if ((level <= INTP_MAXDEPTH) &&
 461  461              (*brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
 462  462                  error = BROP(p)->b_elfexec(vp, uap, args,
 463  463                      idatap, level + 1, execsz, setid, exec_file, cred,
 464  464                      brand_action);
 465  465                  goto out;
 466  466          }
 467  467  
 468  468          /*
 469  469           * Determine aux size now so that stack can be built
 470  470           * in one shot (except actual copyout of aux image),
 471  471           * determine any non-default stack protections,
 472  472           * and still have this code be machine independent.
 473  473           */
 474  474          hsize = ehdrp->e_phentsize;
 475  475          phdrp = (Phdr *)phdrbase;
 476  476          for (i = nphdrs; i > 0; i--) {
 477  477                  switch (phdrp->p_type) {
 478  478                  case PT_INTERP:
 479  479                          hasauxv = hasdy = 1;
 480  480                          break;
 481  481                  case PT_PHDR:
 482  482                          hasu = 1;
 483  483                          break;
 484  484                  case PT_SUNWSTACK:
 485  485                          args->stk_prot = PROT_USER;
 486  486                          if (phdrp->p_flags & PF_R)
 487  487                                  args->stk_prot |= PROT_READ;
 488  488                          if (phdrp->p_flags & PF_W)
 489  489                                  args->stk_prot |= PROT_WRITE;
 490  490                          if (phdrp->p_flags & PF_X)
 491  491                                  args->stk_prot |= PROT_EXEC;
 492  492                          break;
 493  493                  case PT_LOAD:
 494  494                          dataphdrp = phdrp;
 495  495                          break;
 496  496                  case PT_SUNWCAP:
 497  497                          capphdr = phdrp;
 498  498                          break;
 499  499                  }
 500  500                  phdrp = (Phdr *)((caddr_t)phdrp + hsize);
 501  501          }
 502  502  
 503  503          if (ehdrp->e_type != ET_EXEC) {
 504  504                  dataphdrp = NULL;
 505  505                  hasauxv = 1;
 506  506          }
 507  507  
 508  508          /* Copy BSS permissions to args->dat_prot */
 509  509          if (dataphdrp != NULL) {
 510  510                  args->dat_prot = PROT_USER;
 511  511                  if (dataphdrp->p_flags & PF_R)
 512  512                          args->dat_prot |= PROT_READ;
 513  513                  if (dataphdrp->p_flags & PF_W)
 514  514                          args->dat_prot |= PROT_WRITE;
 515  515                  if (dataphdrp->p_flags & PF_X)
 516  516                          args->dat_prot |= PROT_EXEC;
 517  517          }
 518  518  
 519  519          /*
 520  520           * If a auxvector will be required - reserve the space for
 521  521           * it now.  This may be increased by exec_args if there are
  
    | 
      ↓ open down ↓ | 
    521 lines elided | 
    
      ↑ open up ↑ | 
  
 522  522           * ISA-specific types (included in __KERN_NAUXV_IMPL).
 523  523           */
 524  524          if (hasauxv) {
 525  525                  /*
 526  526                   * If a AUX vector is being built - the base AUX
 527  527                   * entries are:
 528  528                   *
 529  529                   *      AT_BASE
 530  530                   *      AT_FLAGS
 531  531                   *      AT_PAGESZ
 532      -                 *      AT_RANDOM       (added in stk_copyout)
      532 +                 *      AT_RANDOM
 533  533                   *      AT_SUN_AUXFLAGS
 534  534                   *      AT_SUN_HWCAP
 535  535                   *      AT_SUN_HWCAP2
 536      -                 *      AT_SUN_PLATFORM (added in stk_copyout)
 537      -                 *      AT_SUN_EXECNAME (added in stk_copyout)
      536 +                 *      AT_SUN_PLATFORM (added in stk_copyout)
      537 +                 *      AT_SUN_EXECNAME (added in stk_copyout)
 538  538                   *      AT_NULL
 539  539                   *
 540  540                   * total == 10
 541  541                   */
 542  542                  if (hasdy && hasu) {
 543  543                          /*
 544  544                           * Has PT_INTERP & PT_PHDR - the auxvectors that
 545  545                           * will be built are:
 546  546                           *
 547  547                           *      AT_PHDR
 548  548                           *      AT_PHENT
 549  549                           *      AT_PHNUM
 550  550                           *      AT_ENTRY
 551  551                           *      AT_LDDATA
 552  552                           *
 553  553                           * total = 5
 554  554                           */
 555  555                          args->auxsize = (10 + 5) * sizeof (aux_entry_t);
 556  556                  } else if (hasdy) {
 557  557                          /*
 558  558                           * Has PT_INTERP but no PT_PHDR
 559  559                           *
 560  560                           *      AT_EXECFD
 561  561                           *      AT_LDDATA
 562  562                           *
 563  563                           * total = 2
 564  564                           */
 565  565                          args->auxsize = (10 + 2) * sizeof (aux_entry_t);
 566  566                  } else {
 567  567                          args->auxsize = 10 * sizeof (aux_entry_t);
 568  568                  }
 569  569          } else {
 570  570                  args->auxsize = 0;
 571  571          }
 572  572  
 573  573          /*
 574  574           * If this binary is using an emulator, we need to add an
 575  575           * AT_SUN_EMULATOR aux entry.
 576  576           */
 577  577          if (args->emulator != NULL)
 578  578                  args->auxsize += sizeof (aux_entry_t);
 579  579  
 580  580          /*
 581  581           * If this is a native binary that's been given a modified interpreter
 582  582           * root, inform it that the native system exists at that root.
 583  583           */
 584  584          if (args->brand_nroot != NULL) {
 585  585                  args->auxsize += sizeof (aux_entry_t);
 586  586          }
 587  587  
 588  588  
 589  589          /*
 590  590           * On supported kernels (x86_64) make room in the auxv for the
 591  591           * AT_SUN_COMMPAGE entry.  This will go unpopulated on i86xpv systems
 592  592           * which do not provide such functionality.
 593  593           */
 594  594  #if defined(__amd64)
 595  595          args->auxsize += sizeof (aux_entry_t);
 596  596  #endif /* defined(__amd64) */
 597  597  
 598  598          /*
 599  599           * If we have user credentials, we'll supply the following entries:
 600  600           *      AT_SUN_UID
 601  601           *      AT_SUN_RUID
 602  602           *      AT_SUN_GID
 603  603           *      AT_SUN_RGID
 604  604           */
 605  605          if (cred != NULL) {
 606  606                  args->auxsize += 4 * sizeof (aux_entry_t);
 607  607          }
 608  608  
 609  609          if ((*brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
 610  610                  branded = 1;
 611  611                  /*
 612  612                   * We will be adding 5 entries to the aux vectors.  One for
 613  613                   * the the brandname and 4 for the brand specific aux vectors.
 614  614                   */
 615  615                  args->auxsize += 5 * sizeof (aux_entry_t);
 616  616          }
 617  617  
 618  618          /* Hardware/Software capabilities */
 619  619          if (capphdr != NULL &&
 620  620              (capsize = capphdr->p_filesz) > 0 &&
 621  621              capsize <= 16 * sizeof (*cap)) {
 622  622                  int ncaps = capsize / sizeof (*cap);
 623  623                  Cap *cp;
 624  624  
 625  625                  cap = kmem_alloc(capsize, KM_SLEEP);
 626  626                  if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)cap,
 627  627                      capsize, (offset_t)capphdr->p_offset,
 628  628                      UIO_SYSSPACE, 0, (rlim64_t)0, CRED(), &resid)) != 0) {
 629  629                          uprintf("%s: Cannot read capabilities section\n",
 630  630                              exec_file);
 631  631                          goto out;
 632  632                  }
 633  633                  for (cp = cap; cp < cap + ncaps; cp++) {
 634  634                          if (cp->c_tag == CA_SUNW_SF_1 &&
 635  635                              (cp->c_un.c_val & SF1_SUNW_ADDR32)) {
  
    | 
      ↓ open down ↓ | 
    88 lines elided | 
    
      ↑ open up ↑ | 
  
 636  636                                  if (args->to_model == DATAMODEL_LP64)
 637  637                                          args->addr32 = 1;
 638  638                                  break;
 639  639                          }
 640  640                  }
 641  641          }
 642  642  
 643  643          aux = bigwad->elfargs;
 644  644          /*
 645  645           * Move args to the user's stack.
 646      -         * This can fill in the AT_SUN_PLATFORM, AT_SUN_EXECNAME and AT_RANDOM
 647      -         * aux entries.
      646 +         * This can fill in the AT_SUN_PLATFORM and AT_SUN_EXECNAME aux entries.
 648  647           */
 649  648          if ((error = exec_args(uap, args, idatap, (void **)&aux)) != 0) {
 650  649                  if (error == -1) {
 651  650                          error = ENOEXEC;
 652  651                          goto bad;
 653  652                  }
 654  653                  goto out;
 655  654          }
 656  655          /* we're single threaded after this point */
 657  656  
 658  657          /*
 659  658           * If this is an ET_DYN executable (shared object),
 660  659           * determine its memory size so that mapelfexec() can load it.
 661  660           */
 662  661          if (ehdrp->e_type == ET_DYN)
 663  662                  len = elfsize(ehdrp, nphdrs, phdrbase, NULL);
 664  663          else
 665  664                  len = 0;
 666  665  
 667  666          dtrphdr = NULL;
 668  667  
 669  668          if ((error = mapelfexec(vp, ehdrp, nphdrs, phdrbase, &uphdr, &dyphdr,
 670  669              &stphdr, &dtrphdr, dataphdrp, &bssbase, &brkbase, &voffset, NULL,
 671  670              len, execsz, &brksize)) != 0)
 672  671                  goto bad;
 673  672  
 674  673          if (uphdr != NULL) {
 675  674                  /*
 676  675                   * Our uphdr has been dynamically allocated if (and only if)
 677  676                   * its program header flags are clear.
 678  677                   */
 679  678                  dynuphdr = (uphdr->p_flags == 0);
 680  679          }
 681  680  
 682  681          if (uphdr != NULL && dyphdr == NULL)
 683  682                  goto bad;
 684  683  
 685  684          if (dtrphdr != NULL && dtrace_safe_phdr(dtrphdr, args, voffset) != 0) {
 686  685                  uprintf("%s: Bad DTrace phdr in %s\n", exec_file, exec_file);
 687  686                  goto bad;
 688  687          }
 689  688  
 690  689          if (dyphdr != NULL) {
 691  690                  size_t          len;
 692  691                  uintptr_t       lddata;
 693  692                  char            *p;
 694  693                  struct vnode    *nvp;
 695  694  
 696  695                  dlnsize = dyphdr->p_filesz + nsize;
 697  696  
 698  697                  if (dlnsize > MAXPATHLEN || dlnsize <= 0)
 699  698                          goto bad;
 700  699  
 701  700                  if (nsize != 0) {
 702  701                          bcopy(args->brand_nroot, dlnp, nsize - 1);
 703  702                          dlnp[nsize - 1] = '/';
 704  703                  }
 705  704  
 706  705                  /*
 707  706                   * Read in "interpreter" pathname.
 708  707                   */
 709  708                  if ((error = vn_rdwr(UIO_READ, vp, dlnp + nsize,
 710  709                      dyphdr->p_filesz, (offset_t)dyphdr->p_offset, UIO_SYSSPACE,
 711  710                      0, (rlim64_t)0, CRED(), &resid)) != 0) {
 712  711                          uprintf("%s: Cannot obtain interpreter pathname\n",
 713  712                              exec_file);
 714  713                          goto bad;
 715  714                  }
 716  715  
 717  716                  if (resid != 0 || dlnp[dlnsize - 1] != '\0')
 718  717                          goto bad;
 719  718  
 720  719                  /*
 721  720                   * Search for '$ORIGIN' token in interpreter path.
 722  721                   * If found, expand it.
 723  722                   */
 724  723                  for (p = dlnp; p = strchr(p, '$'); ) {
 725  724                          uint_t  len, curlen;
 726  725                          char    *_ptr;
 727  726  
 728  727                          if (strncmp(++p, ORIGIN_STR, ORIGIN_STR_SIZE))
 729  728                                  continue;
 730  729  
 731  730                          /*
 732  731                           * We don't support $ORIGIN on setid programs to close
 733  732                           * a potential attack vector.
 734  733                           */
 735  734                          if ((setid & EXECSETID_SETID) != 0) {
 736  735                                  error = ENOEXEC;
 737  736                                  goto bad;
 738  737                          }
 739  738  
 740  739                          curlen = 0;
 741  740                          len = p - dlnp - 1;
 742  741                          if (len) {
 743  742                                  bcopy(dlnp, pathbufp, len);
 744  743                                  curlen += len;
 745  744                          }
 746  745                          if (_ptr = strrchr(args->pathname, '/')) {
 747  746                                  len = _ptr - args->pathname;
 748  747                                  if ((curlen + len) > MAXPATHLEN)
 749  748                                          break;
 750  749  
 751  750                                  bcopy(args->pathname, &pathbufp[curlen], len);
 752  751                                  curlen += len;
 753  752                          } else {
 754  753                                  /*
 755  754                                   * executable is a basename found in the
 756  755                                   * current directory.  So - just substitue
 757  756                                   * '.' for ORIGIN.
 758  757                                   */
 759  758                                  pathbufp[curlen] = '.';
 760  759                                  curlen++;
 761  760                          }
 762  761                          p += ORIGIN_STR_SIZE;
 763  762                          len = strlen(p);
 764  763  
 765  764                          if ((curlen + len) > MAXPATHLEN)
 766  765                                  break;
 767  766                          bcopy(p, &pathbufp[curlen], len);
 768  767                          curlen += len;
 769  768                          pathbufp[curlen++] = '\0';
 770  769                          bcopy(pathbufp, dlnp, curlen);
 771  770                  }
 772  771  
 773  772                  /*
 774  773                   * /usr/lib/ld.so.1 is known to be a symlink to /lib/ld.so.1
 775  774                   * (and /usr/lib/64/ld.so.1 is a symlink to /lib/64/ld.so.1).
 776  775                   * Just in case /usr is not mounted, change it now.
 777  776                   */
 778  777                  if (strcmp(dlnp, USR_LIB_RTLD) == 0)
 779  778                          dlnp += 4;
 780  779                  error = lookupname(dlnp, UIO_SYSSPACE, FOLLOW, NULLVPP, &nvp);
 781  780                  if (error && dlnp != bigwad->dl_name) {
 782  781                          /* new kernel, old user-level */
 783  782                          error = lookupname(dlnp -= 4, UIO_SYSSPACE, FOLLOW,
 784  783                              NULLVPP, &nvp);
 785  784                  }
 786  785                  if (error) {
 787  786                          uprintf("%s: Cannot find %s\n", exec_file, dlnp);
 788  787                          goto bad;
 789  788                  }
 790  789  
 791  790                  /*
 792  791                   * Setup the "aux" vector.
 793  792                   */
 794  793                  if (uphdr) {
 795  794                          if (ehdrp->e_type == ET_DYN) {
 796  795                                  /* don't use the first page */
 797  796                                  bigwad->exenv.ex_brkbase = (caddr_t)PAGESIZE;
 798  797                                  bigwad->exenv.ex_bssbase = (caddr_t)PAGESIZE;
 799  798                          } else {
 800  799                                  bigwad->exenv.ex_bssbase = bssbase;
 801  800                                  bigwad->exenv.ex_brkbase = brkbase;
 802  801                          }
 803  802                          bigwad->exenv.ex_brksize = brksize;
 804  803                          bigwad->exenv.ex_magic = elfmagic;
 805  804                          bigwad->exenv.ex_vp = vp;
 806  805                          setexecenv(&bigwad->exenv);
 807  806  
 808  807                          ADDAUX(aux, AT_PHDR, uphdr->p_vaddr + voffset)
 809  808                          ADDAUX(aux, AT_PHENT, ehdrp->e_phentsize)
 810  809                          ADDAUX(aux, AT_PHNUM, nphdrs)
 811  810                          ADDAUX(aux, AT_ENTRY, ehdrp->e_entry + voffset)
 812  811                  } else {
 813  812                          if ((error = execopen(&vp, &fd)) != 0) {
 814  813                                  VN_RELE(nvp);
 815  814                                  goto bad;
 816  815                          }
 817  816  
 818  817                          ADDAUX(aux, AT_EXECFD, fd)
 819  818                  }
 820  819  
 821  820                  if ((error = execpermissions(nvp, &bigwad->vattr, args)) != 0) {
 822  821                          VN_RELE(nvp);
 823  822                          uprintf("%s: Cannot execute %s\n", exec_file, dlnp);
 824  823                          goto bad;
 825  824                  }
 826  825  
 827  826                  /*
 828  827                   * Now obtain the ELF header along with the entire program
 829  828                   * header contained in "nvp".
 830  829                   */
 831  830                  kmem_free(phdrbase, phdrsize);
 832  831                  phdrbase = NULL;
 833  832                  if ((error = getelfhead(nvp, CRED(), ehdrp, &nshdrs,
 834  833                      &shstrndx, &nphdrs)) != 0 ||
 835  834                      (error = getelfphdr(nvp, CRED(), ehdrp, nphdrs, &phdrbase,
 836  835                      &phdrsize)) != 0) {
 837  836                          VN_RELE(nvp);
 838  837                          uprintf("%s: Cannot read %s\n", exec_file, dlnp);
 839  838                          goto bad;
 840  839                  }
 841  840  
 842  841                  /*
 843  842                   * Determine memory size of the "interpreter's" loadable
 844  843                   * sections.  This size is then used to obtain the virtual
 845  844                   * address of a hole, in the user's address space, large
 846  845                   * enough to map the "interpreter".
 847  846                   */
 848  847                  if ((len = elfsize(ehdrp, nphdrs, phdrbase, &lddata)) == 0) {
 849  848                          VN_RELE(nvp);
 850  849                          uprintf("%s: Nothing to load in %s\n", exec_file, dlnp);
 851  850                          goto bad;
 852  851                  }
 853  852  
 854  853                  dtrphdr = NULL;
 855  854  
 856  855                  error = mapelfexec(nvp, ehdrp, nphdrs, phdrbase, NULL, &junk,
 857  856                      &junk, &dtrphdr, NULL, NULL, NULL, &voffset, NULL, len,
 858  857                      execsz, NULL);
 859  858  
 860  859                  if (error || junk != NULL) {
 861  860                          VN_RELE(nvp);
 862  861                          uprintf("%s: Cannot map %s\n", exec_file, dlnp);
 863  862                          goto bad;
 864  863                  }
 865  864  
 866  865                  /*
 867  866                   * We use the DTrace program header to initialize the
 868  867                   * architecture-specific user per-LWP location. The dtrace
 869  868                   * fasttrap provider requires ready access to per-LWP scratch
 870  869                   * space. We assume that there is only one such program header
 871  870                   * in the interpreter.
 872  871                   */
 873  872                  if (dtrphdr != NULL &&
 874  873                      dtrace_safe_phdr(dtrphdr, args, voffset) != 0) {
 875  874                          VN_RELE(nvp);
 876  875                          uprintf("%s: Bad DTrace phdr in %s\n", exec_file, dlnp);
 877  876                          goto bad;
  
    | 
      ↓ open down ↓ | 
    220 lines elided | 
    
      ↑ open up ↑ | 
  
 878  877                  }
 879  878  
 880  879                  VN_RELE(nvp);
 881  880                  ADDAUX(aux, AT_SUN_LDDATA, voffset + lddata)
 882  881          }
 883  882  
 884  883          if (hasauxv) {
 885  884                  int auxf = AF_SUN_HWCAPVERIFY;
 886  885  
 887  886                  /*
 888      -                 * Note: AT_SUN_PLATFORM, AT_SUN_EXECNAME and AT_RANDOM were
 889      -                 * filled in via exec_args()
      887 +                 * Note: AT_SUN_PLATFORM and AT_RANDOM were filled in via
      888 +                 * exec_args()
 890  889                   */
 891  890                  ADDAUX(aux, AT_BASE, voffset)
 892  891                  ADDAUX(aux, AT_FLAGS, at_flags)
 893  892                  ADDAUX(aux, AT_PAGESZ, PAGESIZE)
 894  893                  /*
 895  894                   * Linker flags. (security)
 896  895                   * p_flag not yet set at this time.
 897  896                   * We rely on gexec() to provide us with the information.
 898  897                   * If the application is set-uid but this is not reflected
 899  898                   * in a mismatch between real/effective uids/gids, then
 900  899                   * don't treat this as a set-uid exec.  So we care about
 901  900                   * the EXECSETID_UGIDS flag but not the ...SETID flag.
 902  901                   */
 903  902                  if ((setid &= ~EXECSETID_SETID) != 0)
 904  903                          auxf |= AF_SUN_SETUGID;
 905  904  
 906  905                  /*
 907  906                   * If we're running a native process from within a branded
 908  907                   * zone under pfexec then we clear the AF_SUN_SETUGID flag so
 909  908                   * that the native ld.so.1 is able to link with the native
 910  909                   * libraries instead of using the brand libraries that are
 911  910                   * installed in the zone.  We only do this for processes
 912  911                   * which we trust because we see they are already running
 913  912                   * under pfexec (where uid != euid).  This prevents a
 914  913                   * malicious user within the zone from crafting a wrapper to
 915  914                   * run native suid commands with unsecure libraries interposed.
 916  915                   */
 917  916                  if ((*brand_action == EBA_NATIVE) && (PROC_IS_BRANDED(p) &&
 918  917                      (setid &= ~EXECSETID_SETID) != 0))
 919  918                          auxf &= ~AF_SUN_SETUGID;
 920  919  
 921  920                  /*
 922  921                   * Record the user addr of the auxflags aux vector entry
 923  922                   * since brands may optionally want to manipulate this field.
 924  923                   */
 925  924                  args->auxp_auxflags =
 926  925                      (char *)((char *)args->stackend +
 927  926                      ((char *)&aux->a_type -
 928  927                      (char *)bigwad->elfargs));
 929  928                  ADDAUX(aux, AT_SUN_AUXFLAGS, auxf);
 930  929  
 931  930                  /*
 932  931                   * Record information about the real and effective user and
 933  932                   * group IDs.
 934  933                   */
 935  934                  if (cred != NULL) {
 936  935                          ADDAUX(aux, AT_SUN_UID, crgetuid(cred));
 937  936                          ADDAUX(aux, AT_SUN_RUID, crgetruid(cred));
 938  937                          ADDAUX(aux, AT_SUN_GID, crgetgid(cred));
 939  938                          ADDAUX(aux, AT_SUN_RGID, crgetrgid(cred));
 940  939                  }
 941  940  
 942  941                  /*
 943  942                   * Hardware capability flag word (performance hints)
 944  943                   * Used for choosing faster library routines.
 945  944                   * (Potentially different between 32-bit and 64-bit ABIs)
 946  945                   */
 947  946  #if defined(_LP64)
 948  947                  if (args->to_model == DATAMODEL_NATIVE) {
 949  948                          ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
 950  949                          ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2)
 951  950                  } else {
 952  951                          ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap32)
 953  952                          ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap32_2)
 954  953                  }
 955  954  #else
 956  955                  ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
 957  956                  ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2)
 958  957  #endif
 959  958                  if (branded) {
 960  959                          /*
 961  960                           * Reserve space for the brand-private aux vectors,
 962  961                           * and record the user addr of that space.
 963  962                           */
 964  963                          args->auxp_brand =
 965  964                              (char *)((char *)args->stackend +
 966  965                              ((char *)&aux->a_type -
 967  966                              (char *)bigwad->elfargs));
 968  967                          ADDAUX(aux, AT_SUN_BRAND_AUX1, 0)
 969  968                          ADDAUX(aux, AT_SUN_BRAND_AUX2, 0)
 970  969                          ADDAUX(aux, AT_SUN_BRAND_AUX3, 0)
 971  970                          ADDAUX(aux, AT_SUN_BRAND_AUX4, 0)
 972  971                  }
 973  972  
 974  973                  /*
 975  974                   * Add the comm page auxv entry, mapping it in if needed.
 976  975                   */
 977  976  #if defined(__amd64)
 978  977                  if (args->commpage != NULL ||
 979  978                      (args->commpage = (uintptr_t)comm_page_mapin()) != NULL) {
 980  979                          ADDAUX(aux, AT_SUN_COMMPAGE, args->commpage)
 981  980                  } else {
 982  981                          /*
 983  982                           * If the comm page cannot be mapped, pad out the auxv
 984  983                           * to satisfy later size checks.
 985  984                           */
 986  985                          ADDAUX(aux, AT_NULL, 0)
 987  986                  }
 988  987  #endif /* defined(__amd64) */
 989  988  
 990  989                  ADDAUX(aux, AT_NULL, 0)
 991  990                  postfixsize = (char *)aux - (char *)bigwad->elfargs;
 992  991  
 993  992                  /*
 994  993                   * We make assumptions above when we determine how many aux
 995  994                   * vector entries we will be adding. However, if we have an
 996  995                   * invalid elf file, it is possible that mapelfexec might
 997  996                   * behave differently (but not return an error), in which case
 998  997                   * the number of aux entries we actually add will be different.
 999  998                   * We detect that now and error out.
1000  999                   */
1001 1000                  if (postfixsize != args->auxsize) {
1002 1001                          DTRACE_PROBE2(elfexec_badaux, int, postfixsize,
1003 1002                              int, args->auxsize);
1004 1003                          goto bad;
1005 1004                  }
1006 1005                  ASSERT(postfixsize <= __KERN_NAUXV_IMPL * sizeof (aux_entry_t));
1007 1006          }
1008 1007  
1009 1008          /*
1010 1009           * For the 64-bit kernel, the limit is big enough that rounding it up
1011 1010           * to a page can overflow the 64-bit limit, so we check for btopr()
1012 1011           * overflowing here by comparing it with the unrounded limit in pages.
1013 1012           * If it hasn't overflowed, compare the exec size with the rounded up
1014 1013           * limit in pages.  Otherwise, just compare with the unrounded limit.
1015 1014           */
1016 1015          limit = btop(p->p_vmem_ctl);
1017 1016          roundlimit = btopr(p->p_vmem_ctl);
1018 1017          if ((roundlimit > limit && *execsz > roundlimit) ||
1019 1018              (roundlimit < limit && *execsz > limit)) {
1020 1019                  mutex_enter(&p->p_lock);
1021 1020                  (void) rctl_action(rctlproc_legacy[RLIMIT_VMEM], p->p_rctls, p,
1022 1021                      RCA_SAFE);
1023 1022                  mutex_exit(&p->p_lock);
1024 1023                  error = ENOMEM;
1025 1024                  goto bad;
1026 1025          }
1027 1026  
1028 1027          bzero(up->u_auxv, sizeof (up->u_auxv));
1029 1028          up->u_commpagep = args->commpage;
1030 1029          if (postfixsize) {
1031 1030                  int num_auxv;
1032 1031  
1033 1032                  /*
1034 1033                   * Copy the aux vector to the user stack.
1035 1034                   */
1036 1035                  error = execpoststack(args, bigwad->elfargs, postfixsize);
1037 1036                  if (error)
1038 1037                          goto bad;
1039 1038  
1040 1039                  /*
1041 1040                   * Copy auxv to the process's user structure for use by /proc.
1042 1041                   * If this is a branded process, the brand's exec routine will
1043 1042                   * copy it's private entries to the user structure later. It
1044 1043                   * relies on the fact that the blank entries are at the end.
1045 1044                   */
1046 1045                  num_auxv = postfixsize / sizeof (aux_entry_t);
1047 1046                  ASSERT(num_auxv <= sizeof (up->u_auxv) / sizeof (auxv_t));
1048 1047                  aux = bigwad->elfargs;
1049 1048                  for (i = 0; i < num_auxv; i++) {
1050 1049                          up->u_auxv[i].a_type = aux[i].a_type;
1051 1050                          up->u_auxv[i].a_un.a_val = (aux_val_t)aux[i].a_un.a_val;
1052 1051                  }
1053 1052          }
1054 1053  
1055 1054          /*
1056 1055           * Pass back the starting address so we can set the program counter.
1057 1056           */
1058 1057          args->entry = (uintptr_t)(ehdrp->e_entry + voffset);
1059 1058  
1060 1059          if (!uphdr) {
1061 1060                  if (ehdrp->e_type == ET_DYN) {
1062 1061                          /*
1063 1062                           * If we are executing a shared library which doesn't
1064 1063                           * have a interpreter (probably ld.so.1) then
1065 1064                           * we don't set the brkbase now.  Instead we
1066 1065                           * delay it's setting until the first call
1067 1066                           * via grow.c::brk().  This permits ld.so.1 to
1068 1067                           * initialize brkbase to the tail of the executable it
1069 1068                           * loads (which is where it needs to be).
1070 1069                           */
1071 1070                          bigwad->exenv.ex_brkbase = (caddr_t)0;
1072 1071                          bigwad->exenv.ex_bssbase = (caddr_t)0;
1073 1072                          bigwad->exenv.ex_brksize = 0;
1074 1073                  } else {
1075 1074                          bigwad->exenv.ex_brkbase = brkbase;
1076 1075                          bigwad->exenv.ex_bssbase = bssbase;
1077 1076                          bigwad->exenv.ex_brksize = brksize;
1078 1077                  }
1079 1078                  bigwad->exenv.ex_magic = elfmagic;
1080 1079                  bigwad->exenv.ex_vp = vp;
1081 1080                  setexecenv(&bigwad->exenv);
1082 1081          }
1083 1082  
1084 1083          ASSERT(error == 0);
1085 1084          goto out;
1086 1085  
1087 1086  bad:
1088 1087          if (fd != -1)           /* did we open the a.out yet */
1089 1088                  (void) execclose(fd);
1090 1089  
1091 1090          psignal(p, SIGKILL);
1092 1091  
1093 1092          if (error == 0)
1094 1093                  error = ENOEXEC;
1095 1094  out:
1096 1095          if (dynuphdr)
1097 1096                  kmem_free(uphdr, sizeof (Phdr));
1098 1097          if (phdrbase != NULL)
1099 1098                  kmem_free(phdrbase, phdrsize);
1100 1099          if (cap != NULL)
1101 1100                  kmem_free(cap, capsize);
1102 1101          kmem_free(bigwad, sizeof (struct bigwad));
1103 1102          return (error);
1104 1103  }
1105 1104  
1106 1105  /*
1107 1106   * Compute the memory size requirement for the ELF file.
1108 1107   */
1109 1108  static size_t
1110 1109  elfsize(Ehdr *ehdrp, int nphdrs, caddr_t phdrbase, uintptr_t *lddata)
1111 1110  {
1112 1111          size_t  len;
1113 1112          Phdr    *phdrp = (Phdr *)phdrbase;
1114 1113          int     hsize = ehdrp->e_phentsize;
1115 1114          int     first = 1;
1116 1115          int     dfirst = 1;     /* first data segment */
1117 1116          uintptr_t loaddr = 0;
1118 1117          uintptr_t hiaddr = 0;
1119 1118          uintptr_t lo, hi;
1120 1119          int     i;
1121 1120  
1122 1121          for (i = nphdrs; i > 0; i--) {
1123 1122                  if (phdrp->p_type == PT_LOAD) {
1124 1123                          lo = phdrp->p_vaddr;
1125 1124                          hi = lo + phdrp->p_memsz;
1126 1125                          if (first) {
1127 1126                                  loaddr = lo;
1128 1127                                  hiaddr = hi;
1129 1128                                  first = 0;
1130 1129                          } else {
1131 1130                                  if (loaddr > lo)
1132 1131                                          loaddr = lo;
1133 1132                                  if (hiaddr < hi)
1134 1133                                          hiaddr = hi;
1135 1134                          }
1136 1135  
1137 1136                          /*
1138 1137                           * save the address of the first data segment
1139 1138                           * of a object - used for the AT_SUNW_LDDATA
1140 1139                           * aux entry.
1141 1140                           */
1142 1141                          if ((lddata != NULL) && dfirst &&
1143 1142                              (phdrp->p_flags & PF_W)) {
1144 1143                                  *lddata = lo;
1145 1144                                  dfirst = 0;
1146 1145                          }
1147 1146                  }
1148 1147                  phdrp = (Phdr *)((caddr_t)phdrp + hsize);
1149 1148          }
1150 1149  
1151 1150          len = hiaddr - (loaddr & PAGEMASK);
1152 1151          len = roundup(len, PAGESIZE);
1153 1152  
1154 1153          return (len);
1155 1154  }
1156 1155  
1157 1156  /*
1158 1157   * Read in the ELF header and program header table.
1159 1158   * SUSV3 requires:
1160 1159   *      ENOEXEC File format is not recognized
1161 1160   *      EINVAL  Format recognized but execution not supported
1162 1161   */
1163 1162  static int
1164 1163  getelfhead(vnode_t *vp, cred_t *credp, Ehdr *ehdr, int *nshdrs, int *shstrndx,
1165 1164      int *nphdrs)
1166 1165  {
1167 1166          int error;
1168 1167          ssize_t resid;
1169 1168  
1170 1169          /*
1171 1170           * We got here by the first two bytes in ident,
1172 1171           * now read the entire ELF header.
1173 1172           */
1174 1173          if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)ehdr,
1175 1174              sizeof (Ehdr), (offset_t)0, UIO_SYSSPACE, 0,
1176 1175              (rlim64_t)0, credp, &resid)) != 0)
1177 1176                  return (error);
1178 1177  
1179 1178          /*
1180 1179           * Since a separate version is compiled for handling 32-bit and
1181 1180           * 64-bit ELF executables on a 64-bit kernel, the 64-bit version
1182 1181           * doesn't need to be able to deal with 32-bit ELF files.
1183 1182           */
1184 1183          if (resid != 0 ||
1185 1184              ehdr->e_ident[EI_MAG2] != ELFMAG2 ||
1186 1185              ehdr->e_ident[EI_MAG3] != ELFMAG3)
1187 1186                  return (ENOEXEC);
1188 1187  
1189 1188          if ((ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) ||
1190 1189  #if defined(_ILP32) || defined(_ELF32_COMPAT)
1191 1190              ehdr->e_ident[EI_CLASS] != ELFCLASS32 ||
1192 1191  #else
1193 1192              ehdr->e_ident[EI_CLASS] != ELFCLASS64 ||
1194 1193  #endif
1195 1194              !elfheadcheck(ehdr->e_ident[EI_DATA], ehdr->e_machine,
1196 1195              ehdr->e_flags))
1197 1196                  return (EINVAL);
1198 1197  
1199 1198          *nshdrs = ehdr->e_shnum;
1200 1199          *shstrndx = ehdr->e_shstrndx;
1201 1200          *nphdrs = ehdr->e_phnum;
1202 1201  
1203 1202          /*
1204 1203           * If e_shnum, e_shstrndx, or e_phnum is its sentinel value, we need
1205 1204           * to read in the section header at index zero to acces the true
1206 1205           * values for those fields.
1207 1206           */
1208 1207          if ((*nshdrs == 0 && ehdr->e_shoff != 0) ||
1209 1208              *shstrndx == SHN_XINDEX || *nphdrs == PN_XNUM) {
1210 1209                  Shdr shdr;
1211 1210  
1212 1211                  if (ehdr->e_shoff == 0)
1213 1212                          return (EINVAL);
1214 1213  
1215 1214                  if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)&shdr,
1216 1215                      sizeof (shdr), (offset_t)ehdr->e_shoff, UIO_SYSSPACE, 0,
1217 1216                      (rlim64_t)0, credp, &resid)) != 0)
1218 1217                          return (error);
1219 1218  
1220 1219                  if (*nshdrs == 0)
1221 1220                          *nshdrs = shdr.sh_size;
1222 1221                  if (*shstrndx == SHN_XINDEX)
1223 1222                          *shstrndx = shdr.sh_link;
1224 1223                  if (*nphdrs == PN_XNUM && shdr.sh_info != 0)
1225 1224                          *nphdrs = shdr.sh_info;
1226 1225          }
1227 1226  
1228 1227          return (0);
1229 1228  }
1230 1229  
1231 1230  #ifdef _ELF32_COMPAT
1232 1231  extern size_t elf_nphdr_max;
1233 1232  #else
1234 1233  size_t elf_nphdr_max = 1000;
1235 1234  #endif
1236 1235  
1237 1236  static int
1238 1237  getelfphdr(vnode_t *vp, cred_t *credp, const Ehdr *ehdr, int nphdrs,
1239 1238      caddr_t *phbasep, ssize_t *phsizep)
1240 1239  {
1241 1240          ssize_t resid, minsize;
1242 1241          int err;
1243 1242  
1244 1243          /*
1245 1244           * Since we're going to be using e_phentsize to iterate down the
1246 1245           * array of program headers, it must be 8-byte aligned or else
1247 1246           * a we might cause a misaligned access. We use all members through
1248 1247           * p_flags on 32-bit ELF files and p_memsz on 64-bit ELF files so
1249 1248           * e_phentsize must be at least large enough to include those
1250 1249           * members.
1251 1250           */
1252 1251  #if !defined(_LP64) || defined(_ELF32_COMPAT)
1253 1252          minsize = offsetof(Phdr, p_flags) + sizeof (((Phdr *)NULL)->p_flags);
1254 1253  #else
1255 1254          minsize = offsetof(Phdr, p_memsz) + sizeof (((Phdr *)NULL)->p_memsz);
1256 1255  #endif
1257 1256          if (ehdr->e_phentsize < minsize || (ehdr->e_phentsize & 3))
1258 1257                  return (EINVAL);
1259 1258  
1260 1259          *phsizep = nphdrs * ehdr->e_phentsize;
1261 1260  
1262 1261          if (*phsizep > sizeof (Phdr) * elf_nphdr_max) {
1263 1262                  if ((*phbasep = kmem_alloc(*phsizep, KM_NOSLEEP)) == NULL)
1264 1263                          return (ENOMEM);
1265 1264          } else {
1266 1265                  *phbasep = kmem_alloc(*phsizep, KM_SLEEP);
1267 1266          }
1268 1267  
1269 1268          if ((err = vn_rdwr(UIO_READ, vp, *phbasep, *phsizep,
1270 1269              (offset_t)ehdr->e_phoff, UIO_SYSSPACE, 0, (rlim64_t)0,
1271 1270              credp, &resid)) != 0) {
1272 1271                  kmem_free(*phbasep, *phsizep);
1273 1272                  *phbasep = NULL;
1274 1273                  return (err);
1275 1274          }
1276 1275  
1277 1276          return (0);
1278 1277  }
1279 1278  
1280 1279  #ifdef _ELF32_COMPAT
1281 1280  extern size_t elf_nshdr_max;
1282 1281  extern size_t elf_shstrtab_max;
1283 1282  #else
1284 1283  size_t elf_nshdr_max = 10000;
1285 1284  size_t elf_shstrtab_max = 100 * 1024;
1286 1285  #endif
1287 1286  
1288 1287  
1289 1288  static int
1290 1289  getelfshdr(vnode_t *vp, cred_t *credp, const Ehdr *ehdr,
1291 1290      int nshdrs, int shstrndx, caddr_t *shbasep, ssize_t *shsizep,
1292 1291      char **shstrbasep, ssize_t *shstrsizep)
1293 1292  {
1294 1293          ssize_t resid, minsize;
1295 1294          int err;
1296 1295          Shdr *shdr;
1297 1296  
1298 1297          /*
1299 1298           * Since we're going to be using e_shentsize to iterate down the
1300 1299           * array of section headers, it must be 8-byte aligned or else
1301 1300           * a we might cause a misaligned access. We use all members through
1302 1301           * sh_entsize (on both 32- and 64-bit ELF files) so e_shentsize
1303 1302           * must be at least large enough to include that member. The index
1304 1303           * of the string table section must also be valid.
1305 1304           */
1306 1305          minsize = offsetof(Shdr, sh_entsize) + sizeof (shdr->sh_entsize);
1307 1306          if (ehdr->e_shentsize < minsize || (ehdr->e_shentsize & 3) ||
1308 1307              shstrndx >= nshdrs)
1309 1308                  return (EINVAL);
1310 1309  
1311 1310          *shsizep = nshdrs * ehdr->e_shentsize;
1312 1311  
1313 1312          if (*shsizep > sizeof (Shdr) * elf_nshdr_max) {
1314 1313                  if ((*shbasep = kmem_alloc(*shsizep, KM_NOSLEEP)) == NULL)
1315 1314                          return (ENOMEM);
1316 1315          } else {
1317 1316                  *shbasep = kmem_alloc(*shsizep, KM_SLEEP);
1318 1317          }
1319 1318  
1320 1319          if ((err = vn_rdwr(UIO_READ, vp, *shbasep, *shsizep,
1321 1320              (offset_t)ehdr->e_shoff, UIO_SYSSPACE, 0, (rlim64_t)0,
1322 1321              credp, &resid)) != 0) {
1323 1322                  kmem_free(*shbasep, *shsizep);
1324 1323                  return (err);
1325 1324          }
1326 1325  
1327 1326          /*
1328 1327           * Pull the section string table out of the vnode; fail if the size
1329 1328           * is zero.
1330 1329           */
1331 1330          shdr = (Shdr *)(*shbasep + shstrndx * ehdr->e_shentsize);
1332 1331          if ((*shstrsizep = shdr->sh_size) == 0) {
1333 1332                  kmem_free(*shbasep, *shsizep);
1334 1333                  return (EINVAL);
1335 1334          }
1336 1335  
1337 1336          if (*shstrsizep > elf_shstrtab_max) {
1338 1337                  if ((*shstrbasep = kmem_alloc(*shstrsizep,
1339 1338                      KM_NOSLEEP)) == NULL) {
1340 1339                          kmem_free(*shbasep, *shsizep);
1341 1340                          return (ENOMEM);
1342 1341                  }
1343 1342          } else {
1344 1343                  *shstrbasep = kmem_alloc(*shstrsizep, KM_SLEEP);
1345 1344          }
1346 1345  
1347 1346          if ((err = vn_rdwr(UIO_READ, vp, *shstrbasep, *shstrsizep,
1348 1347              (offset_t)shdr->sh_offset, UIO_SYSSPACE, 0, (rlim64_t)0,
1349 1348              credp, &resid)) != 0) {
1350 1349                  kmem_free(*shbasep, *shsizep);
1351 1350                  kmem_free(*shstrbasep, *shstrsizep);
1352 1351                  return (err);
1353 1352          }
1354 1353  
1355 1354          /*
1356 1355           * Make sure the strtab is null-terminated to make sure we
1357 1356           * don't run off the end of the table.
1358 1357           */
1359 1358          (*shstrbasep)[*shstrsizep - 1] = '\0';
1360 1359  
1361 1360          return (0);
1362 1361  }
1363 1362  
1364 1363  
1365 1364  #ifdef _ELF32_COMPAT
1366 1365  int
1367 1366  elf32readhdr(vnode_t *vp, cred_t *credp, Ehdr *ehdrp, int *nphdrs,
1368 1367      caddr_t *phbasep, ssize_t *phsizep)
1369 1368  #else
1370 1369  int
1371 1370  elfreadhdr(vnode_t *vp, cred_t *credp, Ehdr *ehdrp, int *nphdrs,
1372 1371      caddr_t *phbasep, ssize_t *phsizep)
1373 1372  #endif
1374 1373  {
1375 1374          int error, nshdrs, shstrndx;
1376 1375  
1377 1376          if ((error = getelfhead(vp, credp, ehdrp, &nshdrs, &shstrndx,
1378 1377              nphdrs)) != 0 ||
1379 1378              (error = getelfphdr(vp, credp, ehdrp, *nphdrs, phbasep,
1380 1379              phsizep)) != 0) {
1381 1380                  return (error);
1382 1381          }
1383 1382          return (0);
1384 1383  }
1385 1384  
1386 1385  
1387 1386  static int
1388 1387  mapelfexec(
1389 1388          vnode_t *vp,
1390 1389          Ehdr *ehdr,
1391 1390          int nphdrs,
1392 1391          caddr_t phdrbase,
1393 1392          Phdr **uphdr,
1394 1393          Phdr **dyphdr,
1395 1394          Phdr **stphdr,
1396 1395          Phdr **dtphdr,
1397 1396          Phdr *dataphdrp,
1398 1397          caddr_t *bssbase,
1399 1398          caddr_t *brkbase,
1400 1399          intptr_t *voffset,
1401 1400          intptr_t *minaddr,
1402 1401          size_t len,
1403 1402          long *execsz,
1404 1403          size_t *brksize)
1405 1404  {
1406 1405          Phdr *phdr;
1407 1406          int i, prot, error, lastprot = 0;
1408 1407          caddr_t addr = NULL;
1409 1408          size_t zfodsz;
1410 1409          int ptload = 0;
1411 1410          int page;
1412 1411          off_t offset;
1413 1412          int hsize = ehdr->e_phentsize;
1414 1413          caddr_t mintmp = (caddr_t)-1;
1415 1414          uintptr_t lastaddr = NULL;
1416 1415          extern int use_brk_lpg;
1417 1416  
1418 1417          if (ehdr->e_type == ET_DYN) {
1419 1418                  caddr_t vaddr;
1420 1419  
1421 1420                  /*
1422 1421                   * Despite the fact that mmapobj(2) refuses to load them, we
1423 1422                   * need to support executing ET_DYN objects that have a
1424 1423                   * non-NULL p_vaddr.  When found in the wild, these objects
1425 1424                   * are likely to be due to an old (and largely obviated) Linux
1426 1425                   * facility, prelink(8), that rewrites shared objects to
1427 1426                   * prefer specific (disjoint) virtual address ranges.  (Yes,
1428 1427                   * this is putatively for performance -- and yes, it has
1429 1428                   * limited applicability, many edge conditions and grisly
1430 1429                   * failure modes; even for Linux, it's insane.)  As ELF
1431 1430                   * mandates that the PT_LOAD segments be in p_vaddr order, we
1432 1431                   * find the lowest p_vaddr by finding the first PT_LOAD
1433 1432                   * segment.
1434 1433                   */
1435 1434                  phdr = (Phdr *)phdrbase;
1436 1435                  for (i = nphdrs; i > 0; i--) {
1437 1436                          if (phdr->p_type == PT_LOAD) {
1438 1437                                  addr = (caddr_t)(uintptr_t)phdr->p_vaddr;
1439 1438                                  break;
1440 1439                          }
1441 1440                          phdr = (Phdr *)((caddr_t)phdr + hsize);
1442 1441                  }
1443 1442  
1444 1443                  /*
1445 1444                   * We have a non-zero p_vaddr in the first PT_LOAD segment --
1446 1445                   * presumably because we're directly executing a prelink(8)'d
1447 1446                   * ld-linux.so.  While we could correctly execute such an
1448 1447                   * object without locating it at its desired p_vaddr (it is,
1449 1448                   * after all, still relocatable), our inner antiquarian
1450 1449                   * derives a perverse pleasure in accommodating the steampunk
1451 1450                   * prelink(8) contraption -- goggles on!
1452 1451                   */
1453 1452                  if ((vaddr = addr) != NULL) {
1454 1453                          if (as_gap(curproc->p_as, len,
1455 1454                              &addr, &len, AH_LO, NULL) == -1 || addr != vaddr) {
1456 1455                                  addr = NULL;
1457 1456                          }
1458 1457                  }
1459 1458  
1460 1459                  if (addr == NULL) {
1461 1460                          /*
1462 1461                           * We either have a NULL p_vaddr (the common case, by
1463 1462                           * many orders of magnitude) or we have a non-NULL
1464 1463                           * p_vaddr and we were unable to obtain the specified
1465 1464                           * VA range (presumably because it's an illegal
1466 1465                           * address).  Either way, obtain an address in which
1467 1466                           * to map the interpreter.
1468 1467                           */
1469 1468                          map_addr(&addr, len, (offset_t)0, 1, 0);
1470 1469                          if (addr == NULL)
1471 1470                                  return (ENOMEM);
1472 1471                  }
1473 1472  
1474 1473                  /*
1475 1474                   * Our voffset is the difference between where we landed and
1476 1475                   * where we wanted to be.
1477 1476                   */
1478 1477                  *voffset = (uintptr_t)addr - (uintptr_t)vaddr;
1479 1478          } else {
1480 1479                  *voffset = 0;
1481 1480          }
1482 1481  
1483 1482          phdr = (Phdr *)phdrbase;
1484 1483          for (i = nphdrs; i > 0; i--) {
1485 1484                  switch (phdr->p_type) {
1486 1485                  case PT_LOAD:
1487 1486                          ptload = 1;
1488 1487                          prot = PROT_USER;
1489 1488                          if (phdr->p_flags & PF_R)
1490 1489                                  prot |= PROT_READ;
1491 1490                          if (phdr->p_flags & PF_W)
1492 1491                                  prot |= PROT_WRITE;
1493 1492                          if (phdr->p_flags & PF_X)
1494 1493                                  prot |= PROT_EXEC;
1495 1494  
1496 1495                          addr = (caddr_t)((uintptr_t)phdr->p_vaddr + *voffset);
1497 1496  
1498 1497                          if ((*dyphdr != NULL) && uphdr != NULL &&
1499 1498                              (*uphdr == NULL)) {
1500 1499                                  /*
1501 1500                                   * The PT_PHDR program header is, strictly
1502 1501                                   * speaking, optional.  If we find that this
1503 1502                                   * is missing, we will determine the location
1504 1503                                   * of the program headers based on the address
1505 1504                                   * of the lowest PT_LOAD segment (namely, this
1506 1505                                   * one):  we subtract the p_offset to get to
1507 1506                                   * the ELF header and then add back the program
1508 1507                                   * header offset to get to the program headers.
1509 1508                                   * We then cons up a Phdr that corresponds to
1510 1509                                   * the (missing) PT_PHDR, setting the flags
1511 1510                                   * to 0 to denote that this is artificial and
1512 1511                                   * should (must) be freed by the caller.
1513 1512                                   */
1514 1513                                  Phdr *cons;
1515 1514  
1516 1515                                  cons = kmem_zalloc(sizeof (Phdr), KM_SLEEP);
1517 1516  
1518 1517                                  cons->p_flags = 0;
1519 1518                                  cons->p_type = PT_PHDR;
1520 1519                                  cons->p_vaddr = ((uintptr_t)addr -
1521 1520                                      phdr->p_offset) + ehdr->e_phoff;
1522 1521  
1523 1522                                  *uphdr = cons;
1524 1523                          }
1525 1524  
1526 1525                          /*
1527 1526                           * Keep track of the segment with the lowest starting
1528 1527                           * address.
1529 1528                           */
1530 1529                          if (addr < mintmp)
1531 1530                                  mintmp = addr;
1532 1531  
1533 1532                          /*
1534 1533                           * Segments need not correspond to page boundaries:
1535 1534                           * they are permitted to share a page.  If two PT_LOAD
1536 1535                           * segments share the same page, and the permissions
1537 1536                           * of the segments differ, the behavior is historically
1538 1537                           * that the permissions of the latter segment are used
1539 1538                           * for the page that the two segments share.  This is
1540 1539                           * also historically a non-issue:  binaries generated
1541 1540                           * by most anything will make sure that two PT_LOAD
1542 1541                           * segments with differing permissions don't actually
1543 1542                           * share any pages.  However, there exist some crazy
1544 1543                           * things out there (including at least an obscure
1545 1544                           * Portuguese teaching language called G-Portugol) that
1546 1545                           * actually do the wrong thing and expect it to work:
1547 1546                           * they have a segment with execute permission share
1548 1547                           * a page with a subsequent segment that does not
1549 1548                           * have execute permissions and expect the resulting
1550 1549                           * shared page to in fact be executable.  To accommodate
1551 1550                           * such broken link editors, we take advantage of a
1552 1551                           * latitude explicitly granted to the loader:  it is
1553 1552                           * permitted to make _any_ PT_LOAD segment executable
1554 1553                           * (provided that it is readable or writable).  If we
1555 1554                           * see that we're sharing a page and that the previous
1556 1555                           * page was executable, we will add execute permissions
1557 1556                           * to our segment.
1558 1557                           */
1559 1558                          if (btop(lastaddr) == btop((uintptr_t)addr) &&
1560 1559                              (phdr->p_flags & (PF_R | PF_W)) &&
1561 1560                              (lastprot & PROT_EXEC)) {
1562 1561                                  prot |= PROT_EXEC;
1563 1562                          }
1564 1563  
1565 1564                          lastaddr = (uintptr_t)addr + phdr->p_filesz;
1566 1565                          lastprot = prot;
1567 1566  
1568 1567                          zfodsz = (size_t)phdr->p_memsz - phdr->p_filesz;
1569 1568  
1570 1569                          offset = phdr->p_offset;
1571 1570                          if (((uintptr_t)offset & PAGEOFFSET) ==
1572 1571                              ((uintptr_t)addr & PAGEOFFSET) &&
1573 1572                              (!(vp->v_flag & VNOMAP))) {
1574 1573                                  page = 1;
1575 1574                          } else {
1576 1575                                  page = 0;
1577 1576                          }
1578 1577  
1579 1578                          /*
1580 1579                           * Set the heap pagesize for OOB when the bss size
1581 1580                           * is known and use_brk_lpg is not 0.
1582 1581                           */
1583 1582                          if (brksize != NULL && use_brk_lpg &&
1584 1583                              zfodsz != 0 && phdr == dataphdrp &&
1585 1584                              (prot & PROT_WRITE)) {
1586 1585                                  size_t tlen = P2NPHASE((uintptr_t)addr +
1587 1586                                      phdr->p_filesz, PAGESIZE);
1588 1587  
1589 1588                                  if (zfodsz > tlen) {
1590 1589                                          curproc->p_brkpageszc =
1591 1590                                              page_szc(map_pgsz(MAPPGSZ_HEAP,
1592 1591                                              curproc, addr + phdr->p_filesz +
1593 1592                                              tlen, zfodsz - tlen, 0));
1594 1593                                  }
1595 1594                          }
1596 1595  
1597 1596                          if (curproc->p_brkpageszc != 0 && phdr == dataphdrp &&
1598 1597                              (prot & PROT_WRITE)) {
1599 1598                                  uint_t  szc = curproc->p_brkpageszc;
1600 1599                                  size_t pgsz = page_get_pagesize(szc);
1601 1600                                  caddr_t ebss = addr + phdr->p_memsz;
1602 1601                                  size_t extra_zfodsz;
1603 1602  
1604 1603                                  ASSERT(pgsz > PAGESIZE);
1605 1604  
1606 1605                                  extra_zfodsz = P2NPHASE((uintptr_t)ebss, pgsz);
1607 1606  
1608 1607                                  if (error = execmap(vp, addr, phdr->p_filesz,
1609 1608                                      zfodsz + extra_zfodsz, phdr->p_offset,
1610 1609                                      prot, page, szc))
1611 1610                                          goto bad;
1612 1611                                  if (brksize != NULL)
1613 1612                                          *brksize = extra_zfodsz;
1614 1613                          } else {
1615 1614                                  if (error = execmap(vp, addr, phdr->p_filesz,
1616 1615                                      zfodsz, phdr->p_offset, prot, page, 0))
1617 1616                                          goto bad;
1618 1617                          }
1619 1618  
1620 1619                          if (bssbase != NULL && addr >= *bssbase &&
1621 1620                              phdr == dataphdrp) {
1622 1621                                  *bssbase = addr + phdr->p_filesz;
1623 1622                          }
1624 1623                          if (brkbase != NULL && addr >= *brkbase) {
1625 1624                                  *brkbase = addr + phdr->p_memsz;
1626 1625                          }
1627 1626  
1628 1627                          *execsz += btopr(phdr->p_memsz);
1629 1628                          break;
1630 1629  
1631 1630                  case PT_INTERP:
1632 1631                          /*
1633 1632                           * The ELF specification is unequivocal about the
1634 1633                           * PT_INTERP program header with respect to any PT_LOAD
1635 1634                           * program header:  "If it is present, it must precede
1636 1635                           * any loadable segment entry." Linux, however, makes
1637 1636                           * no attempt to enforce this -- which has allowed some
1638 1637                           * binary editing tools to get away with generating
1639 1638                           * invalid ELF binaries in the respect that PT_INTERP
1640 1639                           * occurs after the first PT_LOAD program header.  This
1641 1640                           * is unfortunate (and of course, disappointing) but
1642 1641                           * it's no worse than that: there is no reason that we
1643 1642                           * can't process the PT_INTERP entry (if present) after
1644 1643                           * one or more PT_LOAD entries.  We therefore
1645 1644                           * deliberately do not check ptload here and always
1646 1645                           * store dyphdr to be the PT_INTERP program header.
1647 1646                           */
1648 1647                          *dyphdr = phdr;
1649 1648                          break;
1650 1649  
1651 1650                  case PT_SHLIB:
1652 1651                          *stphdr = phdr;
1653 1652                          break;
1654 1653  
1655 1654                  case PT_PHDR:
1656 1655                          if (ptload || phdr->p_flags == 0)
1657 1656                                  goto bad;
1658 1657  
1659 1658                          if (uphdr != NULL)
1660 1659                                  *uphdr = phdr;
1661 1660  
1662 1661                          break;
1663 1662  
1664 1663                  case PT_NULL:
1665 1664                  case PT_DYNAMIC:
1666 1665                  case PT_NOTE:
1667 1666                          break;
1668 1667  
1669 1668                  case PT_SUNWDTRACE:
1670 1669                          if (dtphdr != NULL)
1671 1670                                  *dtphdr = phdr;
1672 1671                          break;
1673 1672  
1674 1673                  default:
1675 1674                          break;
1676 1675                  }
1677 1676                  phdr = (Phdr *)((caddr_t)phdr + hsize);
1678 1677          }
1679 1678  
1680 1679          if (minaddr != NULL) {
1681 1680                  ASSERT(mintmp != (caddr_t)-1);
1682 1681                  *minaddr = (intptr_t)mintmp;
1683 1682          }
1684 1683  
1685 1684          return (0);
1686 1685  bad:
1687 1686          if (error == 0)
1688 1687                  error = EINVAL;
1689 1688          return (error);
1690 1689  }
1691 1690  
1692 1691  int
1693 1692  elfnote(vnode_t *vp, offset_t *offsetp, int type, int descsz, void *desc,
1694 1693      rlim64_t rlimit, cred_t *credp)
1695 1694  {
1696 1695          Note note;
1697 1696          int error;
1698 1697  
1699 1698          bzero(¬e, sizeof (note));
1700 1699          bcopy("CORE", note.name, 4);
1701 1700          note.nhdr.n_type = type;
1702 1701          /*
1703 1702           * The System V ABI states that n_namesz must be the length of the
1704 1703           * string that follows the Nhdr structure including the terminating
1705 1704           * null. The ABI also specifies that sufficient padding should be
1706 1705           * included so that the description that follows the name string
1707 1706           * begins on a 4- or 8-byte boundary for 32- and 64-bit binaries
1708 1707           * respectively. However, since this change was not made correctly
1709 1708           * at the time of the 64-bit port, both 32- and 64-bit binaries
1710 1709           * descriptions are only guaranteed to begin on a 4-byte boundary.
1711 1710           */
1712 1711          note.nhdr.n_namesz = 5;
1713 1712          note.nhdr.n_descsz = roundup(descsz, sizeof (Word));
1714 1713  
1715 1714          if (error = core_write(vp, UIO_SYSSPACE, *offsetp, ¬e,
1716 1715              sizeof (note), rlimit, credp))
1717 1716                  return (error);
1718 1717  
1719 1718          *offsetp += sizeof (note);
1720 1719  
1721 1720          if (error = core_write(vp, UIO_SYSSPACE, *offsetp, desc,
1722 1721              note.nhdr.n_descsz, rlimit, credp))
1723 1722                  return (error);
1724 1723  
1725 1724          *offsetp += note.nhdr.n_descsz;
1726 1725          return (0);
1727 1726  }
1728 1727  
1729 1728  /*
1730 1729   * Copy the section data from one vnode to the section of another vnode.
1731 1730   */
1732 1731  static void
1733 1732  copy_scn(Shdr *src, vnode_t *src_vp, Shdr *dst, vnode_t *dst_vp, Off *doffset,
1734 1733      void *buf, size_t size, cred_t *credp, rlim64_t rlimit)
1735 1734  {
1736 1735          ssize_t resid;
1737 1736          size_t len, n = src->sh_size;
1738 1737          offset_t off = 0;
1739 1738  
1740 1739          while (n != 0) {
1741 1740                  len = MIN(size, n);
1742 1741                  if (vn_rdwr(UIO_READ, src_vp, buf, len, src->sh_offset + off,
1743 1742                      UIO_SYSSPACE, 0, (rlim64_t)0, credp, &resid) != 0 ||
1744 1743                      resid >= len ||
1745 1744                      core_write(dst_vp, UIO_SYSSPACE, *doffset + off,
1746 1745                      buf, len - resid, rlimit, credp) != 0) {
1747 1746                          dst->sh_size = 0;
1748 1747                          dst->sh_offset = 0;
1749 1748                          return;
1750 1749                  }
1751 1750  
1752 1751                  ASSERT(n >= len - resid);
1753 1752  
1754 1753                  n -= len - resid;
1755 1754                  off += len - resid;
1756 1755          }
1757 1756  
1758 1757          *doffset += src->sh_size;
1759 1758  }
1760 1759  
1761 1760  #ifdef _ELF32_COMPAT
1762 1761  extern size_t elf_datasz_max;
1763 1762  #else
1764 1763  size_t elf_datasz_max = 1 * 1024 * 1024;
1765 1764  #endif
1766 1765  
1767 1766  /*
1768 1767   * This function processes mappings that correspond to load objects to
1769 1768   * examine their respective sections for elfcore(). It's called once with
1770 1769   * v set to NULL to count the number of sections that we're going to need
1771 1770   * and then again with v set to some allocated buffer that we fill in with
1772 1771   * all the section data.
1773 1772   */
1774 1773  static int
1775 1774  process_scns(core_content_t content, proc_t *p, cred_t *credp, vnode_t *vp,
1776 1775      Shdr *v, int nv, rlim64_t rlimit, Off *doffsetp, int *nshdrsp)
1777 1776  {
1778 1777          vnode_t *lastvp = NULL;
1779 1778          struct seg *seg;
1780 1779          int i, j;
1781 1780          void *data = NULL;
1782 1781          size_t datasz = 0;
1783 1782          shstrtab_t shstrtab;
1784 1783          struct as *as = p->p_as;
1785 1784          int error = 0;
1786 1785  
1787 1786          if (v != NULL)
1788 1787                  shstrtab_init(&shstrtab);
1789 1788  
1790 1789          i = 1;
1791 1790          for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1792 1791                  uint_t prot;
1793 1792                  vnode_t *mvp;
1794 1793                  void *tmp = NULL;
1795 1794                  caddr_t saddr = seg->s_base;
1796 1795                  caddr_t naddr;
1797 1796                  caddr_t eaddr;
1798 1797                  size_t segsize;
1799 1798  
1800 1799                  Ehdr ehdr;
1801 1800                  int nshdrs, shstrndx, nphdrs;
1802 1801                  caddr_t shbase;
1803 1802                  ssize_t shsize;
1804 1803                  char *shstrbase;
1805 1804                  ssize_t shstrsize;
1806 1805  
1807 1806                  Shdr *shdr;
1808 1807                  const char *name;
1809 1808                  size_t sz;
1810 1809                  uintptr_t off;
1811 1810  
1812 1811                  int ctf_ndx = 0;
1813 1812                  int symtab_ndx = 0;
1814 1813  
1815 1814                  /*
1816 1815                   * Since we're just looking for text segments of load
1817 1816                   * objects, we only care about the protection bits; we don't
1818 1817                   * care about the actual size of the segment so we use the
1819 1818                   * reserved size. If the segment's size is zero, there's
1820 1819                   * something fishy going on so we ignore this segment.
1821 1820                   */
1822 1821                  if (seg->s_ops != &segvn_ops ||
1823 1822                      SEGOP_GETVP(seg, seg->s_base, &mvp) != 0 ||
1824 1823                      mvp == lastvp || mvp == NULL || mvp->v_type != VREG ||
1825 1824                      (segsize = pr_getsegsize(seg, 1)) == 0)
1826 1825                          continue;
1827 1826  
1828 1827                  eaddr = saddr + segsize;
1829 1828                  prot = pr_getprot(seg, 1, &tmp, &saddr, &naddr, eaddr);
1830 1829                  pr_getprot_done(&tmp);
1831 1830  
1832 1831                  /*
1833 1832                   * Skip this segment unless the protection bits look like
1834 1833                   * what we'd expect for a text segment.
1835 1834                   */
1836 1835                  if ((prot & (PROT_WRITE | PROT_EXEC)) != PROT_EXEC)
1837 1836                          continue;
1838 1837  
1839 1838                  if (getelfhead(mvp, credp, &ehdr, &nshdrs, &shstrndx,
1840 1839                      &nphdrs) != 0 ||
1841 1840                      getelfshdr(mvp, credp, &ehdr, nshdrs, shstrndx,
1842 1841                      &shbase, &shsize, &shstrbase, &shstrsize) != 0)
1843 1842                          continue;
1844 1843  
1845 1844                  off = ehdr.e_shentsize;
1846 1845                  for (j = 1; j < nshdrs; j++, off += ehdr.e_shentsize) {
1847 1846                          Shdr *symtab = NULL, *strtab;
1848 1847  
1849 1848                          shdr = (Shdr *)(shbase + off);
1850 1849  
1851 1850                          if (shdr->sh_name >= shstrsize)
1852 1851                                  continue;
1853 1852  
1854 1853                          name = shstrbase + shdr->sh_name;
1855 1854  
1856 1855                          if (strcmp(name, shstrtab_data[STR_CTF]) == 0) {
1857 1856                                  if ((content & CC_CONTENT_CTF) == 0 ||
1858 1857                                      ctf_ndx != 0)
1859 1858                                          continue;
1860 1859  
1861 1860                                  if (shdr->sh_link > 0 &&
1862 1861                                      shdr->sh_link < nshdrs) {
1863 1862                                          symtab = (Shdr *)(shbase +
1864 1863                                              shdr->sh_link * ehdr.e_shentsize);
1865 1864                                  }
1866 1865  
1867 1866                                  if (v != NULL && i < nv - 1) {
1868 1867                                          if (shdr->sh_size > datasz &&
1869 1868                                              shdr->sh_size <= elf_datasz_max) {
1870 1869                                                  if (data != NULL)
1871 1870                                                          kmem_free(data, datasz);
1872 1871  
1873 1872                                                  datasz = shdr->sh_size;
1874 1873                                                  data = kmem_alloc(datasz,
1875 1874                                                      KM_SLEEP);
1876 1875                                          }
1877 1876  
1878 1877                                          v[i].sh_name = shstrtab_ndx(&shstrtab,
1879 1878                                              STR_CTF);
1880 1879                                          v[i].sh_addr = (Addr)(uintptr_t)saddr;
1881 1880                                          v[i].sh_type = SHT_PROGBITS;
1882 1881                                          v[i].sh_addralign = 4;
1883 1882                                          *doffsetp = roundup(*doffsetp,
1884 1883                                              v[i].sh_addralign);
1885 1884                                          v[i].sh_offset = *doffsetp;
1886 1885                                          v[i].sh_size = shdr->sh_size;
1887 1886                                          if (symtab == NULL)  {
1888 1887                                                  v[i].sh_link = 0;
1889 1888                                          } else if (symtab->sh_type ==
1890 1889                                              SHT_SYMTAB &&
1891 1890                                              symtab_ndx != 0) {
1892 1891                                                  v[i].sh_link =
1893 1892                                                      symtab_ndx;
1894 1893                                          } else {
1895 1894                                                  v[i].sh_link = i + 1;
1896 1895                                          }
1897 1896  
1898 1897                                          copy_scn(shdr, mvp, &v[i], vp,
1899 1898                                              doffsetp, data, datasz, credp,
1900 1899                                              rlimit);
1901 1900                                  }
1902 1901  
1903 1902                                  ctf_ndx = i++;
1904 1903  
1905 1904                                  /*
1906 1905                                   * We've already dumped the symtab.
1907 1906                                   */
1908 1907                                  if (symtab != NULL &&
1909 1908                                      symtab->sh_type == SHT_SYMTAB &&
1910 1909                                      symtab_ndx != 0)
1911 1910                                          continue;
1912 1911  
1913 1912                          } else if (strcmp(name,
1914 1913                              shstrtab_data[STR_SYMTAB]) == 0) {
1915 1914                                  if ((content & CC_CONTENT_SYMTAB) == 0 ||
1916 1915                                      symtab != 0)
1917 1916                                          continue;
1918 1917  
1919 1918                                  symtab = shdr;
1920 1919                          }
1921 1920  
1922 1921                          if (symtab != NULL) {
1923 1922                                  if ((symtab->sh_type != SHT_DYNSYM &&
1924 1923                                      symtab->sh_type != SHT_SYMTAB) ||
1925 1924                                      symtab->sh_link == 0 ||
1926 1925                                      symtab->sh_link >= nshdrs)
1927 1926                                          continue;
1928 1927  
1929 1928                                  strtab = (Shdr *)(shbase +
1930 1929                                      symtab->sh_link * ehdr.e_shentsize);
1931 1930  
1932 1931                                  if (strtab->sh_type != SHT_STRTAB)
1933 1932                                          continue;
1934 1933  
1935 1934                                  if (v != NULL && i < nv - 2) {
1936 1935                                          sz = MAX(symtab->sh_size,
1937 1936                                              strtab->sh_size);
1938 1937                                          if (sz > datasz &&
1939 1938                                              sz <= elf_datasz_max) {
1940 1939                                                  if (data != NULL)
1941 1940                                                          kmem_free(data, datasz);
1942 1941  
1943 1942                                                  datasz = sz;
1944 1943                                                  data = kmem_alloc(datasz,
1945 1944                                                      KM_SLEEP);
1946 1945                                          }
1947 1946  
1948 1947                                          if (symtab->sh_type == SHT_DYNSYM) {
1949 1948                                                  v[i].sh_name = shstrtab_ndx(
1950 1949                                                      &shstrtab, STR_DYNSYM);
1951 1950                                                  v[i + 1].sh_name = shstrtab_ndx(
1952 1951                                                      &shstrtab, STR_DYNSTR);
1953 1952                                          } else {
1954 1953                                                  v[i].sh_name = shstrtab_ndx(
1955 1954                                                      &shstrtab, STR_SYMTAB);
1956 1955                                                  v[i + 1].sh_name = shstrtab_ndx(
1957 1956                                                      &shstrtab, STR_STRTAB);
1958 1957                                          }
1959 1958  
1960 1959                                          v[i].sh_type = symtab->sh_type;
1961 1960                                          v[i].sh_addr = symtab->sh_addr;
1962 1961                                          if (ehdr.e_type == ET_DYN ||
1963 1962                                              v[i].sh_addr == 0)
1964 1963                                                  v[i].sh_addr +=
1965 1964                                                      (Addr)(uintptr_t)saddr;
1966 1965                                          v[i].sh_addralign =
1967 1966                                              symtab->sh_addralign;
1968 1967                                          *doffsetp = roundup(*doffsetp,
1969 1968                                              v[i].sh_addralign);
1970 1969                                          v[i].sh_offset = *doffsetp;
1971 1970                                          v[i].sh_size = symtab->sh_size;
1972 1971                                          v[i].sh_link = i + 1;
1973 1972                                          v[i].sh_entsize = symtab->sh_entsize;
1974 1973                                          v[i].sh_info = symtab->sh_info;
1975 1974  
1976 1975                                          copy_scn(symtab, mvp, &v[i], vp,
1977 1976                                              doffsetp, data, datasz, credp,
1978 1977                                              rlimit);
1979 1978  
1980 1979                                          v[i + 1].sh_type = SHT_STRTAB;
1981 1980                                          v[i + 1].sh_flags = SHF_STRINGS;
1982 1981                                          v[i + 1].sh_addr = symtab->sh_addr;
1983 1982                                          if (ehdr.e_type == ET_DYN ||
1984 1983                                              v[i + 1].sh_addr == 0)
1985 1984                                                  v[i + 1].sh_addr +=
1986 1985                                                      (Addr)(uintptr_t)saddr;
1987 1986                                          v[i + 1].sh_addralign =
1988 1987                                              strtab->sh_addralign;
1989 1988                                          *doffsetp = roundup(*doffsetp,
1990 1989                                              v[i + 1].sh_addralign);
1991 1990                                          v[i + 1].sh_offset = *doffsetp;
1992 1991                                          v[i + 1].sh_size = strtab->sh_size;
1993 1992  
1994 1993                                          copy_scn(strtab, mvp, &v[i + 1], vp,
1995 1994                                              doffsetp, data, datasz, credp,
1996 1995                                              rlimit);
1997 1996                                  }
1998 1997  
1999 1998                                  if (symtab->sh_type == SHT_SYMTAB)
2000 1999                                          symtab_ndx = i;
2001 2000                                  i += 2;
2002 2001                          }
2003 2002                  }
2004 2003  
2005 2004                  kmem_free(shstrbase, shstrsize);
2006 2005                  kmem_free(shbase, shsize);
2007 2006  
2008 2007                  lastvp = mvp;
2009 2008          }
2010 2009  
2011 2010          if (v == NULL) {
2012 2011                  if (i == 1)
2013 2012                          *nshdrsp = 0;
2014 2013                  else
2015 2014                          *nshdrsp = i + 1;
2016 2015                  goto done;
2017 2016          }
2018 2017  
2019 2018          if (i != nv - 1) {
2020 2019                  cmn_err(CE_WARN, "elfcore: core dump failed for "
2021 2020                      "process %d; address space is changing", p->p_pid);
2022 2021                  error = EIO;
2023 2022                  goto done;
2024 2023          }
2025 2024  
2026 2025          v[i].sh_name = shstrtab_ndx(&shstrtab, STR_SHSTRTAB);
2027 2026          v[i].sh_size = shstrtab_size(&shstrtab);
2028 2027          v[i].sh_addralign = 1;
2029 2028          *doffsetp = roundup(*doffsetp, v[i].sh_addralign);
2030 2029          v[i].sh_offset = *doffsetp;
2031 2030          v[i].sh_flags = SHF_STRINGS;
2032 2031          v[i].sh_type = SHT_STRTAB;
2033 2032  
2034 2033          if (v[i].sh_size > datasz) {
2035 2034                  if (data != NULL)
2036 2035                          kmem_free(data, datasz);
2037 2036  
2038 2037                  datasz = v[i].sh_size;
2039 2038                  data = kmem_alloc(datasz,
2040 2039                      KM_SLEEP);
2041 2040          }
2042 2041  
2043 2042          shstrtab_dump(&shstrtab, data);
2044 2043  
2045 2044          if ((error = core_write(vp, UIO_SYSSPACE, *doffsetp,
2046 2045              data, v[i].sh_size, rlimit, credp)) != 0)
2047 2046                  goto done;
2048 2047  
2049 2048          *doffsetp += v[i].sh_size;
2050 2049  
2051 2050  done:
2052 2051          if (data != NULL)
2053 2052                  kmem_free(data, datasz);
2054 2053  
2055 2054          return (error);
2056 2055  }
2057 2056  
2058 2057  int
2059 2058  elfcore(vnode_t *vp, proc_t *p, cred_t *credp, rlim64_t rlimit, int sig,
2060 2059      core_content_t content)
2061 2060  {
2062 2061          offset_t poffset, soffset;
2063 2062          Off doffset;
2064 2063          int error, i, nphdrs, nshdrs;
2065 2064          int overflow = 0;
2066 2065          struct seg *seg;
2067 2066          struct as *as = p->p_as;
2068 2067          union {
2069 2068                  Ehdr ehdr;
2070 2069                  Phdr phdr[1];
2071 2070                  Shdr shdr[1];
2072 2071          } *bigwad;
2073 2072          size_t bigsize;
2074 2073          size_t phdrsz, shdrsz;
2075 2074          Ehdr *ehdr;
2076 2075          Phdr *v;
2077 2076          caddr_t brkbase;
2078 2077          size_t brksize;
2079 2078          caddr_t stkbase;
2080 2079          size_t stksize;
2081 2080          int ntries = 0;
2082 2081          klwp_t *lwp = ttolwp(curthread);
2083 2082  
2084 2083  top:
2085 2084          /*
2086 2085           * Make sure we have everything we need (registers, etc.).
2087 2086           * All other lwps have already stopped and are in an orderly state.
2088 2087           */
2089 2088          ASSERT(p == ttoproc(curthread));
2090 2089          prstop(0, 0);
2091 2090  
2092 2091          AS_LOCK_ENTER(as, RW_WRITER);
2093 2092          nphdrs = prnsegs(as, 0) + 2;            /* two CORE note sections */
2094 2093  
2095 2094          /*
2096 2095           * Count the number of section headers we're going to need.
2097 2096           */
2098 2097          nshdrs = 0;
2099 2098          if (content & (CC_CONTENT_CTF | CC_CONTENT_SYMTAB)) {
2100 2099                  (void) process_scns(content, p, credp, NULL, NULL, NULL, 0,
2101 2100                      NULL, &nshdrs);
2102 2101          }
2103 2102          AS_LOCK_EXIT(as);
2104 2103  
2105 2104          ASSERT(nshdrs == 0 || nshdrs > 1);
2106 2105  
2107 2106          /*
2108 2107           * The core file contents may required zero section headers, but if
2109 2108           * we overflow the 16 bits allotted to the program header count in
2110 2109           * the ELF header, we'll need that program header at index zero.
2111 2110           */
2112 2111          if (nshdrs == 0 && nphdrs >= PN_XNUM)
2113 2112                  nshdrs = 1;
2114 2113  
2115 2114          phdrsz = nphdrs * sizeof (Phdr);
2116 2115          shdrsz = nshdrs * sizeof (Shdr);
2117 2116  
2118 2117          bigsize = MAX(sizeof (*bigwad), MAX(phdrsz, shdrsz));
2119 2118          bigwad = kmem_alloc(bigsize, KM_SLEEP);
2120 2119  
2121 2120          ehdr = &bigwad->ehdr;
2122 2121          bzero(ehdr, sizeof (*ehdr));
2123 2122  
2124 2123          ehdr->e_ident[EI_MAG0] = ELFMAG0;
2125 2124          ehdr->e_ident[EI_MAG1] = ELFMAG1;
2126 2125          ehdr->e_ident[EI_MAG2] = ELFMAG2;
2127 2126          ehdr->e_ident[EI_MAG3] = ELFMAG3;
2128 2127          ehdr->e_ident[EI_CLASS] = ELFCLASS;
2129 2128          ehdr->e_type = ET_CORE;
2130 2129  
2131 2130  #if !defined(_LP64) || defined(_ELF32_COMPAT)
2132 2131  
2133 2132  #if defined(__sparc)
2134 2133          ehdr->e_ident[EI_DATA] = ELFDATA2MSB;
2135 2134          ehdr->e_machine = EM_SPARC;
2136 2135  #elif defined(__i386) || defined(__i386_COMPAT)
2137 2136          ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
2138 2137          ehdr->e_machine = EM_386;
2139 2138  #else
2140 2139  #error "no recognized machine type is defined"
2141 2140  #endif
2142 2141  
2143 2142  #else   /* !defined(_LP64) || defined(_ELF32_COMPAT) */
2144 2143  
2145 2144  #if defined(__sparc)
2146 2145          ehdr->e_ident[EI_DATA] = ELFDATA2MSB;
2147 2146          ehdr->e_machine = EM_SPARCV9;
2148 2147  #elif defined(__amd64)
2149 2148          ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
2150 2149          ehdr->e_machine = EM_AMD64;
2151 2150  #else
2152 2151  #error "no recognized 64-bit machine type is defined"
2153 2152  #endif
2154 2153  
2155 2154  #endif  /* !defined(_LP64) || defined(_ELF32_COMPAT) */
2156 2155  
2157 2156          /*
2158 2157           * If the count of program headers or section headers or the index
2159 2158           * of the section string table can't fit in the mere 16 bits
2160 2159           * shortsightedly allotted to them in the ELF header, we use the
2161 2160           * extended formats and put the real values in the section header
2162 2161           * as index 0.
2163 2162           */
2164 2163          ehdr->e_version = EV_CURRENT;
2165 2164          ehdr->e_ehsize = sizeof (Ehdr);
2166 2165  
2167 2166          if (nphdrs >= PN_XNUM)
2168 2167                  ehdr->e_phnum = PN_XNUM;
2169 2168          else
2170 2169                  ehdr->e_phnum = (unsigned short)nphdrs;
2171 2170  
2172 2171          ehdr->e_phoff = sizeof (Ehdr);
2173 2172          ehdr->e_phentsize = sizeof (Phdr);
2174 2173  
2175 2174          if (nshdrs > 0) {
2176 2175                  if (nshdrs >= SHN_LORESERVE)
2177 2176                          ehdr->e_shnum = 0;
2178 2177                  else
2179 2178                          ehdr->e_shnum = (unsigned short)nshdrs;
2180 2179  
2181 2180                  if (nshdrs - 1 >= SHN_LORESERVE)
2182 2181                          ehdr->e_shstrndx = SHN_XINDEX;
2183 2182                  else
2184 2183                          ehdr->e_shstrndx = (unsigned short)(nshdrs - 1);
2185 2184  
2186 2185                  ehdr->e_shoff = ehdr->e_phoff + ehdr->e_phentsize * nphdrs;
2187 2186                  ehdr->e_shentsize = sizeof (Shdr);
2188 2187          }
2189 2188  
2190 2189          if (error = core_write(vp, UIO_SYSSPACE, (offset_t)0, ehdr,
2191 2190              sizeof (Ehdr), rlimit, credp))
2192 2191                  goto done;
2193 2192  
2194 2193          poffset = sizeof (Ehdr);
2195 2194          soffset = sizeof (Ehdr) + phdrsz;
2196 2195          doffset = sizeof (Ehdr) + phdrsz + shdrsz;
2197 2196  
2198 2197          v = &bigwad->phdr[0];
2199 2198          bzero(v, phdrsz);
2200 2199  
2201 2200          setup_old_note_header(&v[0], p);
2202 2201          v[0].p_offset = doffset = roundup(doffset, sizeof (Word));
2203 2202          doffset += v[0].p_filesz;
2204 2203  
2205 2204          setup_note_header(&v[1], p);
2206 2205          v[1].p_offset = doffset = roundup(doffset, sizeof (Word));
2207 2206          doffset += v[1].p_filesz;
2208 2207  
2209 2208          mutex_enter(&p->p_lock);
2210 2209  
2211 2210          brkbase = p->p_brkbase;
2212 2211          brksize = p->p_brksize;
2213 2212  
2214 2213          stkbase = p->p_usrstack - p->p_stksize;
2215 2214          stksize = p->p_stksize;
2216 2215  
2217 2216          mutex_exit(&p->p_lock);
2218 2217  
2219 2218          AS_LOCK_ENTER(as, RW_WRITER);
2220 2219          i = 2;
2221 2220          for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
2222 2221                  caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2223 2222                  caddr_t saddr, naddr;
2224 2223                  void *tmp = NULL;
2225 2224                  extern struct seg_ops segspt_shmops;
2226 2225  
2227 2226                  for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2228 2227                          uint_t prot;
2229 2228                          size_t size;
2230 2229                          int type;
2231 2230                          vnode_t *mvp;
2232 2231  
2233 2232                          prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2234 2233                          prot &= PROT_READ | PROT_WRITE | PROT_EXEC;
2235 2234                          if ((size = (size_t)(naddr - saddr)) == 0)
2236 2235                                  continue;
2237 2236                          if (i == nphdrs) {
2238 2237                                  overflow++;
2239 2238                                  continue;
2240 2239                          }
2241 2240                          v[i].p_type = PT_LOAD;
2242 2241                          v[i].p_vaddr = (Addr)(uintptr_t)saddr;
2243 2242                          v[i].p_memsz = size;
2244 2243                          if (prot & PROT_READ)
2245 2244                                  v[i].p_flags |= PF_R;
2246 2245                          if (prot & PROT_WRITE)
2247 2246                                  v[i].p_flags |= PF_W;
2248 2247                          if (prot & PROT_EXEC)
2249 2248                                  v[i].p_flags |= PF_X;
2250 2249  
2251 2250                          /*
2252 2251                           * Figure out which mappings to include in the core.
2253 2252                           */
2254 2253                          type = SEGOP_GETTYPE(seg, saddr);
2255 2254  
2256 2255                          if (saddr == stkbase && size == stksize) {
2257 2256                                  if (!(content & CC_CONTENT_STACK))
2258 2257                                          goto exclude;
2259 2258  
2260 2259                          } else if (saddr == brkbase && size == brksize) {
2261 2260                                  if (!(content & CC_CONTENT_HEAP))
2262 2261                                          goto exclude;
2263 2262  
2264 2263                          } else if (seg->s_ops == &segspt_shmops) {
2265 2264                                  if (type & MAP_NORESERVE) {
2266 2265                                          if (!(content & CC_CONTENT_DISM))
2267 2266                                                  goto exclude;
2268 2267                                  } else {
2269 2268                                          if (!(content & CC_CONTENT_ISM))
2270 2269                                                  goto exclude;
2271 2270                                  }
2272 2271  
2273 2272                          } else if (seg->s_ops != &segvn_ops) {
2274 2273                                  goto exclude;
2275 2274  
2276 2275                          } else if (type & MAP_SHARED) {
2277 2276                                  if (shmgetid(p, saddr) != SHMID_NONE) {
2278 2277                                          if (!(content & CC_CONTENT_SHM))
2279 2278                                                  goto exclude;
2280 2279  
2281 2280                                  } else if (SEGOP_GETVP(seg, seg->s_base,
2282 2281                                      &mvp) != 0 || mvp == NULL ||
2283 2282                                      mvp->v_type != VREG) {
2284 2283                                          if (!(content & CC_CONTENT_SHANON))
2285 2284                                                  goto exclude;
2286 2285  
2287 2286                                  } else {
2288 2287                                          if (!(content & CC_CONTENT_SHFILE))
2289 2288                                                  goto exclude;
2290 2289                                  }
2291 2290  
2292 2291                          } else if (SEGOP_GETVP(seg, seg->s_base, &mvp) != 0 ||
2293 2292                              mvp == NULL || mvp->v_type != VREG) {
2294 2293                                  if (!(content & CC_CONTENT_ANON))
2295 2294                                          goto exclude;
2296 2295  
2297 2296                          } else if (prot == (PROT_READ | PROT_EXEC)) {
2298 2297                                  if (!(content & CC_CONTENT_TEXT))
2299 2298                                          goto exclude;
2300 2299  
2301 2300                          } else if (prot == PROT_READ) {
2302 2301                                  if (!(content & CC_CONTENT_RODATA))
2303 2302                                          goto exclude;
2304 2303  
2305 2304                          } else {
2306 2305                                  if (!(content & CC_CONTENT_DATA))
2307 2306                                          goto exclude;
2308 2307                          }
2309 2308  
2310 2309                          doffset = roundup(doffset, sizeof (Word));
2311 2310                          v[i].p_offset = doffset;
2312 2311                          v[i].p_filesz = size;
2313 2312                          doffset += size;
2314 2313  exclude:
2315 2314                          i++;
2316 2315                  }
2317 2316                  ASSERT(tmp == NULL);
2318 2317          }
2319 2318          AS_LOCK_EXIT(as);
2320 2319  
2321 2320          if (overflow || i != nphdrs) {
2322 2321                  if (ntries++ == 0) {
2323 2322                          kmem_free(bigwad, bigsize);
2324 2323                          overflow = 0;
2325 2324                          goto top;
2326 2325                  }
2327 2326                  cmn_err(CE_WARN, "elfcore: core dump failed for "
2328 2327                      "process %d; address space is changing", p->p_pid);
2329 2328                  error = EIO;
2330 2329                  goto done;
2331 2330          }
2332 2331  
2333 2332          if ((error = core_write(vp, UIO_SYSSPACE, poffset,
2334 2333              v, phdrsz, rlimit, credp)) != 0)
2335 2334                  goto done;
2336 2335  
2337 2336          if ((error = write_old_elfnotes(p, sig, vp, v[0].p_offset, rlimit,
2338 2337              credp)) != 0)
2339 2338                  goto done;
2340 2339  
2341 2340          if ((error = write_elfnotes(p, sig, vp, v[1].p_offset, rlimit,
2342 2341              credp, content)) != 0)
2343 2342                  goto done;
2344 2343  
2345 2344          for (i = 2; i < nphdrs; i++) {
2346 2345                  prkillinfo_t killinfo;
2347 2346                  sigqueue_t *sq;
2348 2347                  int sig, j;
2349 2348  
2350 2349                  if (v[i].p_filesz == 0)
2351 2350                          continue;
2352 2351  
2353 2352                  /*
2354 2353                   * If dumping out this segment fails, rather than failing
2355 2354                   * the core dump entirely, we reset the size of the mapping
2356 2355                   * to zero to indicate that the data is absent from the core
2357 2356                   * file and or in the PF_SUNW_FAILURE flag to differentiate
2358 2357                   * this from mappings that were excluded due to the core file
2359 2358                   * content settings.
2360 2359                   */
2361 2360                  if ((error = core_seg(p, vp, v[i].p_offset,
2362 2361                      (caddr_t)(uintptr_t)v[i].p_vaddr, v[i].p_filesz,
2363 2362                      rlimit, credp)) == 0) {
2364 2363                          continue;
2365 2364                  }
2366 2365  
2367 2366                  if ((sig = lwp->lwp_cursig) == 0) {
2368 2367                          /*
2369 2368                           * We failed due to something other than a signal.
2370 2369                           * Since the space reserved for the segment is now
2371 2370                           * unused, we stash the errno in the first four
2372 2371                           * bytes. This undocumented interface will let us
2373 2372                           * understand the nature of the failure.
2374 2373                           */
2375 2374                          (void) core_write(vp, UIO_SYSSPACE, v[i].p_offset,
2376 2375                              &error, sizeof (error), rlimit, credp);
2377 2376  
2378 2377                          v[i].p_filesz = 0;
2379 2378                          v[i].p_flags |= PF_SUNW_FAILURE;
2380 2379                          if ((error = core_write(vp, UIO_SYSSPACE,
2381 2380                              poffset + sizeof (v[i]) * i, &v[i], sizeof (v[i]),
2382 2381                              rlimit, credp)) != 0)
2383 2382                                  goto done;
2384 2383  
2385 2384                          continue;
2386 2385                  }
2387 2386  
2388 2387                  /*
2389 2388                   * We took a signal.  We want to abort the dump entirely, but
2390 2389                   * we also want to indicate what failed and why.  We therefore
2391 2390                   * use the space reserved for the first failing segment to
2392 2391                   * write our error (which, for purposes of compatability with
2393 2392                   * older core dump readers, we set to EINTR) followed by any
2394 2393                   * siginfo associated with the signal.
2395 2394                   */
2396 2395                  bzero(&killinfo, sizeof (killinfo));
2397 2396                  killinfo.prk_error = EINTR;
2398 2397  
2399 2398                  sq = sig == SIGKILL ? curproc->p_killsqp : lwp->lwp_curinfo;
2400 2399  
2401 2400                  if (sq != NULL) {
2402 2401                          bcopy(&sq->sq_info, &killinfo.prk_info,
2403 2402                              sizeof (sq->sq_info));
2404 2403                  } else {
2405 2404                          killinfo.prk_info.si_signo = lwp->lwp_cursig;
2406 2405                          killinfo.prk_info.si_code = SI_NOINFO;
2407 2406                  }
2408 2407  
2409 2408  #if (defined(_SYSCALL32_IMPL) || defined(_LP64))
2410 2409                  /*
2411 2410                   * If this is a 32-bit process, we need to translate from the
2412 2411                   * native siginfo to the 32-bit variant.  (Core readers must
2413 2412                   * always have the same data model as their target or must
2414 2413                   * be aware of -- and compensate for -- data model differences.)
2415 2414                   */
2416 2415                  if (curproc->p_model == DATAMODEL_ILP32) {
2417 2416                          siginfo32_t si32;
2418 2417  
2419 2418                          siginfo_kto32((k_siginfo_t *)&killinfo.prk_info, &si32);
2420 2419                          bcopy(&si32, &killinfo.prk_info, sizeof (si32));
2421 2420                  }
2422 2421  #endif
2423 2422  
2424 2423                  (void) core_write(vp, UIO_SYSSPACE, v[i].p_offset,
2425 2424                      &killinfo, sizeof (killinfo), rlimit, credp);
2426 2425  
2427 2426                  /*
2428 2427                   * For the segment on which we took the signal, indicate that
2429 2428                   * its data now refers to a siginfo.
2430 2429                   */
2431 2430                  v[i].p_filesz = 0;
2432 2431                  v[i].p_flags |= PF_SUNW_FAILURE | PF_SUNW_KILLED |
2433 2432                      PF_SUNW_SIGINFO;
2434 2433  
2435 2434                  /*
2436 2435                   * And for every other segment, indicate that its absence
2437 2436                   * is due to a signal.
2438 2437                   */
2439 2438                  for (j = i + 1; j < nphdrs; j++) {
2440 2439                          v[j].p_filesz = 0;
2441 2440                          v[j].p_flags |= PF_SUNW_FAILURE | PF_SUNW_KILLED;
2442 2441                  }
2443 2442  
2444 2443                  /*
2445 2444                   * Finally, write out our modified program headers.
2446 2445                   */
2447 2446                  if ((error = core_write(vp, UIO_SYSSPACE,
2448 2447                      poffset + sizeof (v[i]) * i, &v[i],
2449 2448                      sizeof (v[i]) * (nphdrs - i), rlimit, credp)) != 0)
2450 2449                          goto done;
2451 2450  
2452 2451                  break;
2453 2452          }
2454 2453  
2455 2454          if (nshdrs > 0) {
2456 2455                  bzero(&bigwad->shdr[0], shdrsz);
2457 2456  
2458 2457                  if (nshdrs >= SHN_LORESERVE)
2459 2458                          bigwad->shdr[0].sh_size = nshdrs;
2460 2459  
2461 2460                  if (nshdrs - 1 >= SHN_LORESERVE)
2462 2461                          bigwad->shdr[0].sh_link = nshdrs - 1;
2463 2462  
2464 2463                  if (nphdrs >= PN_XNUM)
2465 2464                          bigwad->shdr[0].sh_info = nphdrs;
2466 2465  
2467 2466                  if (nshdrs > 1) {
2468 2467                          AS_LOCK_ENTER(as, RW_WRITER);
2469 2468                          if ((error = process_scns(content, p, credp, vp,
2470 2469                              &bigwad->shdr[0], nshdrs, rlimit, &doffset,
2471 2470                              NULL)) != 0) {
2472 2471                                  AS_LOCK_EXIT(as);
2473 2472                                  goto done;
2474 2473                          }
2475 2474                          AS_LOCK_EXIT(as);
2476 2475                  }
2477 2476  
2478 2477                  if ((error = core_write(vp, UIO_SYSSPACE, soffset,
2479 2478                      &bigwad->shdr[0], shdrsz, rlimit, credp)) != 0)
2480 2479                          goto done;
2481 2480          }
2482 2481  
2483 2482  done:
2484 2483          kmem_free(bigwad, bigsize);
2485 2484          return (error);
2486 2485  }
2487 2486  
2488 2487  #ifndef _ELF32_COMPAT
2489 2488  
2490 2489  static struct execsw esw = {
2491 2490  #ifdef  _LP64
2492 2491          elf64magicstr,
2493 2492  #else   /* _LP64 */
2494 2493          elf32magicstr,
2495 2494  #endif  /* _LP64 */
2496 2495          0,
2497 2496          5,
2498 2497          elfexec,
2499 2498          elfcore
2500 2499  };
2501 2500  
2502 2501  static struct modlexec modlexec = {
2503 2502          &mod_execops, "exec module for elf", &esw
2504 2503  };
2505 2504  
2506 2505  #ifdef  _LP64
2507 2506  extern int elf32exec(vnode_t *vp, execa_t *uap, uarg_t *args,
2508 2507                          intpdata_t *idatap, int level, long *execsz,
2509 2508                          int setid, caddr_t exec_file, cred_t *cred,
2510 2509                          int *brand_action);
2511 2510  extern int elf32core(vnode_t *vp, proc_t *p, cred_t *credp,
2512 2511                          rlim64_t rlimit, int sig, core_content_t content);
2513 2512  
2514 2513  static struct execsw esw32 = {
2515 2514          elf32magicstr,
2516 2515          0,
2517 2516          5,
2518 2517          elf32exec,
2519 2518          elf32core
2520 2519  };
2521 2520  
2522 2521  static struct modlexec modlexec32 = {
2523 2522          &mod_execops, "32-bit exec module for elf", &esw32
2524 2523  };
2525 2524  #endif  /* _LP64 */
2526 2525  
2527 2526  static struct modlinkage modlinkage = {
2528 2527          MODREV_1,
2529 2528          (void *)&modlexec,
2530 2529  #ifdef  _LP64
2531 2530          (void *)&modlexec32,
2532 2531  #endif  /* _LP64 */
2533 2532          NULL
2534 2533  };
2535 2534  
2536 2535  int
2537 2536  _init(void)
2538 2537  {
2539 2538          return (mod_install(&modlinkage));
2540 2539  }
2541 2540  
2542 2541  int
2543 2542  _fini(void)
2544 2543  {
2545 2544          return (mod_remove(&modlinkage));
2546 2545  }
2547 2546  
2548 2547  int
2549 2548  _info(struct modinfo *modinfop)
2550 2549  {
2551 2550          return (mod_info(&modlinkage, modinfop));
2552 2551  }
2553 2552  
2554 2553  #endif  /* !_ELF32_COMPAT */
  
    | 
      ↓ open down ↓ | 
    1655 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX