Print this page
OS-5015 PT_INTERP headers should be permitted after PT_LOAD headers
OS-5451 comm page should not break i86xpv
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-5192 need faster clock_gettime
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
Reviewed by: Ryan Zezeski <ryan@zinascii.com>
OS-5293 lx brand: prelink(8)'d binaries core dump before main()
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-5072 lxbrand support PT_GNU_STACK
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-5202 Support AT_SECURE & AT_*ID in LX
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
OS-4824 Unlike Linux, nested interpreters don't work
(LX changes only, the rest were upstreamed...)
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Cody Mello <cody.mello@joyent.com>
OS-3735 modstubs MAXNARG is too low.
OS-3733 Verify b_native_exec exists before calling it
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-4119 lxbrand panic when running native perl inside lx zone
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-4128 programs that lack PT_PHDR are not properly loaded
OS-4141 freeing phdrs induces bad kmem_free() in elfexec()
backout OS-4141: needs more work
backout OS-4128: needs more work
OS-4141 freeing phdrs induces bad kmem_free() in elfexec()
OS-4128 programs that lack PT_PHDR are not properly loaded
OS-3696 lx brand: G-Portugol programs core dump
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-3517 lx brand: branded zones don't interpret .interp section
OS-3405 lx brand: socket() fails for PF_INET6
OS-3382 lxbrand 64bit gettimeofday depends on vsyscall or vdso
OS-3280 need a way to specify the root of a native system in the lx brand
OS-3279 lx brand should allow delegated datasets
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-2949 add support for AT_RANDOM aux vector entry
OS-2877 lx_librtld_db falls to load due to NULL DT_DEBUG

*** 24,34 **** */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ /* ! * Copyright (c) 2013, Joyent, Inc. All rights reserved. */ #include <sys/types.h> #include <sys/param.h> #include <sys/thread.h> --- 24,34 ---- */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ /* ! * Copyright 2016 Joyent, Inc. */ #include <sys/types.h> #include <sys/param.h> #include <sys/thread.h>
*** 64,73 **** --- 64,78 ---- #include <sys/brand.h> #include "elf_impl.h" #include <sys/sdt.h> #include <sys/siginfo.h> + #if defined(__x86) + #include <sys/comm_page_util.h> + #endif /* defined(__x86) */ + + extern int at_flags; #define ORIGIN_STR "ORIGIN" #define ORIGIN_STR_SIZE 6
*** 161,176 **** return (0); } /* ! * Map in the executable pointed to by vp. Returns 0 on success. */ int mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr, ! intptr_t *voffset, caddr_t exec_file, int *interp, caddr_t *bssbase, ! caddr_t *brkbase, size_t *brksize, uintptr_t *lddatap) { size_t len; struct vattr vat; caddr_t phdrbase = NULL; ssize_t phdrsize; --- 166,185 ---- return (0); } /* ! * Map in the executable pointed to by vp. Returns 0 on success. Note that ! * this function currently has the maximum number of arguments allowed by ! * modstubs on x86 (MAXNARG)! Do _not_ add to this function signature without ! * adding to MAXNARG. (Better yet, do not add to this monster of a function ! * signature!) */ int mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr, ! intptr_t *voffset, caddr_t exec_file, char **interpp, caddr_t *bssbase, ! caddr_t *brkbase, size_t *brksize, uintptr_t *lddatap, uintptr_t *minaddrp) { size_t len; struct vattr vat; caddr_t phdrbase = NULL; ssize_t phdrsize;
*** 178,194 **** --- 187,207 ---- int error = 0; Phdr *uphdr = NULL; Phdr *junk = NULL; Phdr *dynphdr = NULL; Phdr *dtrphdr = NULL; + char *interp = NULL; uintptr_t lddata; long execsz; intptr_t minaddr; if (lddatap != NULL) *lddatap = NULL; + if (minaddrp != NULL) + *minaddrp = NULL; + if (error = execpermissions(vp, &vat, args)) { uprintf("%s: Cannot execute %s\n", exec_file, args->pathname); return (error); }
*** 210,238 **** if (error = mapelfexec(vp, ehdr, nphdrs, phdrbase, &uphdr, &dynphdr, &junk, &dtrphdr, NULL, bssbase, brkbase, voffset, &minaddr, len, &execsz, brksize)) { uprintf("%s: Cannot map %s\n", exec_file, args->pathname); kmem_free(phdrbase, phdrsize); return (error); } /* ! * Inform our caller if the executable needs an interpreter. */ ! *interp = (dynphdr == NULL) ? 0 : 1; /* * If this is a statically linked executable, voffset should indicate * the address of the executable itself (it normally holds the address * of the interpreter). */ ! if (ehdr->e_type == ET_EXEC && *interp == 0) *voffset = minaddr; if (uphdr != NULL) { *uphdr_vaddr = uphdr->p_vaddr; } else { *uphdr_vaddr = (Addr)-1; } kmem_free(phdrbase, phdrsize); --- 223,315 ---- if (error = mapelfexec(vp, ehdr, nphdrs, phdrbase, &uphdr, &dynphdr, &junk, &dtrphdr, NULL, bssbase, brkbase, voffset, &minaddr, len, &execsz, brksize)) { uprintf("%s: Cannot map %s\n", exec_file, args->pathname); + if (uphdr != NULL && uphdr->p_flags == 0) + kmem_free(uphdr, sizeof (Phdr)); kmem_free(phdrbase, phdrsize); return (error); } + if (minaddrp != NULL) + *minaddrp = minaddr; + /* ! * If the executable requires an interpreter, determine its name. */ ! if (dynphdr != NULL) { ! ssize_t resid; + if (dynphdr->p_filesz > MAXPATHLEN || dynphdr->p_filesz == 0) { + uprintf("%s: Invalid interpreter\n", exec_file); + kmem_free(phdrbase, phdrsize); + return (ENOEXEC); + } + + interp = kmem_alloc(MAXPATHLEN, KM_SLEEP); + + if ((error = vn_rdwr(UIO_READ, vp, interp, dynphdr->p_filesz, + (offset_t)dynphdr->p_offset, UIO_SYSSPACE, 0, + (rlim64_t)0, CRED(), &resid)) != 0 || resid != 0 || + interp[dynphdr->p_filesz - 1] != '\0') { + uprintf("%s: Cannot obtain interpreter pathname\n", + exec_file); + kmem_free(interp, MAXPATHLEN); + kmem_free(phdrbase, phdrsize); + return (error != 0 ? error : ENOEXEC); + } + } + /* * If this is a statically linked executable, voffset should indicate * the address of the executable itself (it normally holds the address * of the interpreter). */ ! if (ehdr->e_type == ET_EXEC && interp == NULL) *voffset = minaddr; + /* + * If the caller has asked for the interpreter name, return it (it's + * up to the caller to free it); if the caller hasn't asked for it, + * free it ourselves. + */ + if (interpp != NULL) { + *interpp = interp; + } else if (interp != NULL) { + kmem_free(interp, MAXPATHLEN); + } + if (uphdr != NULL) { *uphdr_vaddr = uphdr->p_vaddr; + + if (uphdr->p_flags == 0) + kmem_free(uphdr, sizeof (Phdr)); + } else if (ehdr->e_type == ET_DYN) { + /* + * If we don't have a uphdr, we'll apply the logic found + * in mapelfexec() and use the p_vaddr of the first PT_LOAD + * section as the base address of the object. + */ + Phdr *phdr = (Phdr *)phdrbase; + int i, hsize = ehdr->e_phentsize; + + for (i = nphdrs; i > 0; i--) { + if (phdr->p_type == PT_LOAD) { + *uphdr_vaddr = (uintptr_t)phdr->p_vaddr + + ehdr->e_phoff; + break; + } + + phdr = (Phdr *)((caddr_t)phdr + hsize); + } + + /* + * If we don't have a PT_LOAD segment, we should have returned + * ENOEXEC when elfsize() returned 0, above. + */ + VERIFY(i > 0); } else { *uphdr_vaddr = (Addr)-1; } kmem_free(phdrbase, phdrsize);
*** 241,257 **** /*ARGSUSED*/ int elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred, ! int brand_action) { caddr_t phdrbase = NULL; caddr_t bssbase = 0; caddr_t brkbase = 0; size_t brksize = 0; ! ssize_t dlnsize; aux_entry_t *aux; int error; ssize_t resid; int fd = -1; intptr_t voffset; --- 318,334 ---- /*ARGSUSED*/ int elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred, ! int *brand_action) { caddr_t phdrbase = NULL; caddr_t bssbase = 0; caddr_t brkbase = 0; size_t brksize = 0; ! ssize_t dlnsize, nsize = 0; aux_entry_t *aux; int error; ssize_t resid; int fd = -1; intptr_t voffset;
*** 271,280 **** --- 348,358 ---- ssize_t capsize; int hasu = 0; int hasauxv = 0; int hasdy = 0; int branded = 0; + int dynuphdr = 0; struct proc *p = ttoproc(curthread); struct user *up = PTOU(p); struct bigwad { Ehdr ehdr;
*** 325,335 **** --- 403,415 ---- if (ehdrp->e_ident[EI_CLASS] == ELFCLASS32) { args->to_model = DATAMODEL_ILP32; *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS32-1); } else { args->to_model = DATAMODEL_LP64; + if (!args->stk_prot_override) { args->stk_prot &= ~PROT_EXEC; + } #if defined(__i386) || defined(__amd64) args->dat_prot &= ~PROT_EXEC; #endif *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS64-1); }
*** 337,360 **** args->to_model = DATAMODEL_ILP32; *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS-1); #endif /* _LP64 */ /* ! * We delay invoking the brand callback until we've figured out ! * what kind of elf binary we're trying to run, 32-bit or 64-bit. ! * We do this because now the brand library can just check ! * args->to_model to see if the target is 32-bit or 64-bit without ! * having do duplicate all the code above. * * The level checks associated with brand handling below are used to * prevent a loop since the brand elfexec function typically comes back * through this function. We must check <= here since the nested * handling in the #! interpreter code will increment the level before * calling gexec to run the final elfexec interpreter. */ if ((level <= INTP_MAXDEPTH) && ! (brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) { error = BROP(p)->b_elfexec(vp, uap, args, idatap, level + 1, execsz, setid, exec_file, cred, brand_action); goto out; } --- 417,466 ---- args->to_model = DATAMODEL_ILP32; *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS-1); #endif /* _LP64 */ /* ! * We delay invoking the brand callback until we've figured out what ! * kind of elf binary we're trying to run, 32-bit or 64-bit. We do this ! * because now the brand library can just check args->to_model to see if ! * the target is 32-bit or 64-bit without having do duplicate all the ! * code above. * + * We also give the brand a chance to indicate that based on the ELF + * OSABI of the target binary it should become unbranded and optionally + * indicate that it should be treated as existing in a specific prefix. + * + * Note that if a brand opts to go down this route it does not actually + * end up being debranded. In other words, future programs that exec + * will still be considered for branding unless this escape hatch is + * used. Consider the case of lx brand for example. If a user runs + * /native/usr/sbin/dtrace -c /bin/ls, the isaexec and normal executable + * of DTrace that's in /native will take this escape hatch and be run + * and interpreted using the normal system call table; however, the + * execution of a non-illumos binary in the form of /bin/ls will still + * be branded and be subject to all of the normal actions of the brand. + * * The level checks associated with brand handling below are used to * prevent a loop since the brand elfexec function typically comes back * through this function. We must check <= here since the nested * handling in the #! interpreter code will increment the level before * calling gexec to run the final elfexec interpreter. */ + if ((level <= INTP_MAXDEPTH) && (*brand_action != EBA_NATIVE) && + (PROC_IS_BRANDED(p)) && (BROP(p)->b_native_exec != NULL)) { + if (BROP(p)->b_native_exec(ehdrp->e_ident[EI_OSABI], + &args->brand_nroot) == B_TRUE) { + ASSERT(ehdrp->e_ident[EI_OSABI]); + *brand_action = EBA_NATIVE; + /* Add one for the trailing '/' in the path */ + if (args->brand_nroot != NULL) + nsize = strlen(args->brand_nroot) + 1; + } + } + if ((level <= INTP_MAXDEPTH) && ! (*brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) { error = BROP(p)->b_elfexec(vp, uap, args, idatap, level + 1, execsz, setid, exec_file, cred, brand_action); goto out; }
*** 421,438 **** * entries are: * * AT_BASE * AT_FLAGS * AT_PAGESZ * AT_SUN_AUXFLAGS * AT_SUN_HWCAP * AT_SUN_HWCAP2 * AT_SUN_PLATFORM (added in stk_copyout) * AT_SUN_EXECNAME (added in stk_copyout) * AT_NULL * ! * total == 9 */ if (hasdy && hasu) { /* * Has PT_INTERP & PT_PHDR - the auxvectors that * will be built are: --- 527,545 ---- * entries are: * * AT_BASE * AT_FLAGS * AT_PAGESZ + * AT_RANDOM * AT_SUN_AUXFLAGS * AT_SUN_HWCAP * AT_SUN_HWCAP2 * AT_SUN_PLATFORM (added in stk_copyout) * AT_SUN_EXECNAME (added in stk_copyout) * AT_NULL * ! * total == 10 */ if (hasdy && hasu) { /* * Has PT_INTERP & PT_PHDR - the auxvectors that * will be built are:
*** 443,465 **** * AT_ENTRY * AT_LDDATA * * total = 5 */ ! args->auxsize = (9 + 5) * sizeof (aux_entry_t); } else if (hasdy) { /* * Has PT_INTERP but no PT_PHDR * * AT_EXECFD * AT_LDDATA * * total = 2 */ ! args->auxsize = (9 + 2) * sizeof (aux_entry_t); } else { ! args->auxsize = 9 * sizeof (aux_entry_t); } } else { args->auxsize = 0; } --- 550,572 ---- * AT_ENTRY * AT_LDDATA * * total = 5 */ ! args->auxsize = (10 + 5) * sizeof (aux_entry_t); } else if (hasdy) { /* * Has PT_INTERP but no PT_PHDR * * AT_EXECFD * AT_LDDATA * * total = 2 */ ! args->auxsize = (10 + 2) * sizeof (aux_entry_t); } else { ! args->auxsize = 10 * sizeof (aux_entry_t); } } else { args->auxsize = 0; }
*** 468,486 **** * AT_SUN_EMULATOR aux entry. */ if (args->emulator != NULL) args->auxsize += sizeof (aux_entry_t); - if ((brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) { - branded = 1; /* ! * We will be adding 4 entries to the aux vectors. One for ! * the the brandname and 3 for the brand specific aux vectors. */ args->auxsize += 4 * sizeof (aux_entry_t); } /* Hardware/Software capabilities */ if (capphdr != NULL && (capsize = capphdr->p_filesz) > 0 && capsize <= 16 * sizeof (*cap)) { int ncaps = capsize / sizeof (*cap); --- 575,622 ---- * AT_SUN_EMULATOR aux entry. */ if (args->emulator != NULL) args->auxsize += sizeof (aux_entry_t); /* ! * If this is a native binary that's been given a modified interpreter ! * root, inform it that the native system exists at that root. */ + if (args->brand_nroot != NULL) { + args->auxsize += sizeof (aux_entry_t); + } + + + /* + * On supported kernels (x86_64) make room in the auxv for the + * AT_SUN_COMMPAGE entry. This will go unpopulated on i86xpv systems + * which do not provide such functionality. + */ + #if defined(__amd64) + args->auxsize += sizeof (aux_entry_t); + #endif /* defined(__amd64) */ + + /* + * If we have user credentials, we'll supply the following entries: + * AT_SUN_UID + * AT_SUN_RUID + * AT_SUN_GID + * AT_SUN_RGID + */ + if (cred != NULL) { args->auxsize += 4 * sizeof (aux_entry_t); } + if ((*brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) { + branded = 1; + /* + * We will be adding 5 entries to the aux vectors. One for + * the the brandname and 4 for the brand specific aux vectors. + */ + args->auxsize += 5 * sizeof (aux_entry_t); + } + /* Hardware/Software capabilities */ if (capphdr != NULL && (capsize = capphdr->p_filesz) > 0 && capsize <= 16 * sizeof (*cap)) { int ncaps = capsize / sizeof (*cap);
*** 532,541 **** --- 668,685 ---- if ((error = mapelfexec(vp, ehdrp, nphdrs, phdrbase, &uphdr, &dyphdr, &stphdr, &dtrphdr, dataphdrp, &bssbase, &brkbase, &voffset, NULL, len, execsz, &brksize)) != 0) goto bad; + if (uphdr != NULL) { + /* + * Our uphdr has been dynamically allocated if (and only if) + * its program header flags are clear. + */ + dynuphdr = (uphdr->p_flags == 0); + } + if (uphdr != NULL && dyphdr == NULL) goto bad; if (dtrphdr != NULL && dtrace_safe_phdr(dtrphdr, args, voffset) != 0) { uprintf("%s: Bad DTrace phdr in %s\n", exec_file, exec_file);
*** 546,566 **** size_t len; uintptr_t lddata; char *p; struct vnode *nvp; ! dlnsize = dyphdr->p_filesz; if (dlnsize > MAXPATHLEN || dlnsize <= 0) goto bad; /* * Read in "interpreter" pathname. */ ! if ((error = vn_rdwr(UIO_READ, vp, dlnp, dyphdr->p_filesz, ! (offset_t)dyphdr->p_offset, UIO_SYSSPACE, 0, (rlim64_t)0, ! CRED(), &resid)) != 0) { uprintf("%s: Cannot obtain interpreter pathname\n", exec_file); goto bad; } --- 690,715 ---- size_t len; uintptr_t lddata; char *p; struct vnode *nvp; ! dlnsize = dyphdr->p_filesz + nsize; if (dlnsize > MAXPATHLEN || dlnsize <= 0) goto bad; + if (nsize != 0) { + bcopy(args->brand_nroot, dlnp, nsize - 1); + dlnp[nsize - 1] = '/'; + } + /* * Read in "interpreter" pathname. */ ! if ((error = vn_rdwr(UIO_READ, vp, dlnp + nsize, ! dyphdr->p_filesz, (offset_t)dyphdr->p_offset, UIO_SYSSPACE, ! 0, (rlim64_t)0, CRED(), &resid)) != 0) { uprintf("%s: Cannot obtain interpreter pathname\n", exec_file); goto bad; }
*** 701,713 **** goto bad; } dtrphdr = NULL; ! error = mapelfexec(nvp, ehdrp, nphdrs, phdrbase, &junk, &junk, &junk, &dtrphdr, NULL, NULL, NULL, &voffset, NULL, len, execsz, NULL); if (error || junk != NULL) { VN_RELE(nvp); uprintf("%s: Cannot map %s\n", exec_file, dlnp); goto bad; } --- 850,863 ---- goto bad; } dtrphdr = NULL; ! error = mapelfexec(nvp, ehdrp, nphdrs, phdrbase, NULL, &junk, &junk, &dtrphdr, NULL, NULL, NULL, &voffset, NULL, len, execsz, NULL); + if (error || junk != NULL) { VN_RELE(nvp); uprintf("%s: Cannot map %s\n", exec_file, dlnp); goto bad; }
*** 730,741 **** ADDAUX(aux, AT_SUN_LDDATA, voffset + lddata) } if (hasauxv) { int auxf = AF_SUN_HWCAPVERIFY; /* ! * Note: AT_SUN_PLATFORM and AT_SUN_EXECNAME were filled in via * exec_args() */ ADDAUX(aux, AT_BASE, voffset) ADDAUX(aux, AT_FLAGS, at_flags) ADDAUX(aux, AT_PAGESZ, PAGESIZE) --- 880,892 ---- ADDAUX(aux, AT_SUN_LDDATA, voffset + lddata) } if (hasauxv) { int auxf = AF_SUN_HWCAPVERIFY; + /* ! * Note: AT_SUN_PLATFORM and AT_RANDOM were filled in via * exec_args() */ ADDAUX(aux, AT_BASE, voffset) ADDAUX(aux, AT_FLAGS, at_flags) ADDAUX(aux, AT_PAGESZ, PAGESIZE)
*** 760,770 **** * which we trust because we see they are already running * under pfexec (where uid != euid). This prevents a * malicious user within the zone from crafting a wrapper to * run native suid commands with unsecure libraries interposed. */ ! if ((brand_action == EBA_NATIVE) && (PROC_IS_BRANDED(p) && (setid &= ~EXECSETID_SETID) != 0)) auxf &= ~AF_SUN_SETUGID; /* * Record the user addr of the auxflags aux vector entry --- 911,921 ---- * which we trust because we see they are already running * under pfexec (where uid != euid). This prevents a * malicious user within the zone from crafting a wrapper to * run native suid commands with unsecure libraries interposed. */ ! if ((*brand_action == EBA_NATIVE) && (PROC_IS_BRANDED(p) && (setid &= ~EXECSETID_SETID) != 0)) auxf &= ~AF_SUN_SETUGID; /* * Record the user addr of the auxflags aux vector entry
*** 773,783 **** --- 924,946 ---- args->auxp_auxflags = (char *)((char *)args->stackend + ((char *)&aux->a_type - (char *)bigwad->elfargs)); ADDAUX(aux, AT_SUN_AUXFLAGS, auxf); + /* + * Record information about the real and effective user and + * group IDs. + */ + if (cred != NULL) { + ADDAUX(aux, AT_SUN_UID, crgetuid(cred)); + ADDAUX(aux, AT_SUN_RUID, crgetruid(cred)); + ADDAUX(aux, AT_SUN_GID, crgetgid(cred)); + ADDAUX(aux, AT_SUN_RGID, crgetrgid(cred)); + } + + /* * Hardware capability flag word (performance hints) * Used for choosing faster library routines. * (Potentially different between 32-bit and 64-bit ABIs) */ #if defined(_LP64)
*** 802,814 **** --- 965,994 ---- ((char *)&aux->a_type - (char *)bigwad->elfargs)); ADDAUX(aux, AT_SUN_BRAND_AUX1, 0) ADDAUX(aux, AT_SUN_BRAND_AUX2, 0) ADDAUX(aux, AT_SUN_BRAND_AUX3, 0) + ADDAUX(aux, AT_SUN_BRAND_AUX4, 0) } + /* + * Add the comm page auxv entry, mapping it in if needed. + */ + #if defined(__amd64) + if (args->commpage != NULL || + (args->commpage = (uintptr_t)comm_page_mapin()) != NULL) { + ADDAUX(aux, AT_SUN_COMMPAGE, args->commpage) + } else { + /* + * If the comm page cannot be mapped, pad out the auxv + * to satisfy later size checks. + */ ADDAUX(aux, AT_NULL, 0) + } + #endif /* defined(__amd64) */ + + ADDAUX(aux, AT_NULL, 0) postfixsize = (char *)aux - (char *)bigwad->elfargs; /* * We make assumptions above when we determine how many aux * vector entries we will be adding. However, if we have an
*** 843,852 **** --- 1023,1033 ---- error = ENOMEM; goto bad; } bzero(up->u_auxv, sizeof (up->u_auxv)); + up->u_commpagep = args->commpage; if (postfixsize) { int num_auxv; /* * Copy the aux vector to the user stack.
*** 909,918 **** --- 1090,1101 ---- psignal(p, SIGKILL); if (error == 0) error = ENOEXEC; out: + if (dynuphdr) + kmem_free(uphdr, sizeof (Phdr)); if (phdrbase != NULL) kmem_free(phdrbase, phdrsize); if (cap != NULL) kmem_free(cap, capsize); kmem_free(bigwad, sizeof (struct bigwad));
*** 1175,1184 **** --- 1358,1390 ---- (*shstrbasep)[*shstrsizep - 1] = '\0'; return (0); } + + #ifdef _ELF32_COMPAT + int + elf32readhdr(vnode_t *vp, cred_t *credp, Ehdr *ehdrp, int *nphdrs, + caddr_t *phbasep, ssize_t *phsizep) + #else + int + elfreadhdr(vnode_t *vp, cred_t *credp, Ehdr *ehdrp, int *nphdrs, + caddr_t *phbasep, ssize_t *phsizep) + #endif + { + int error, nshdrs, shstrndx; + + if ((error = getelfhead(vp, credp, ehdrp, &nshdrs, &shstrndx, + nphdrs)) != 0 || + (error = getelfphdr(vp, credp, ehdrp, *nphdrs, phbasep, + phsizep)) != 0) { + return (error); + } + return (0); + } + + static int mapelfexec( vnode_t *vp, Ehdr *ehdr, int nphdrs,
*** 1195,1249 **** size_t len, long *execsz, size_t *brksize) { Phdr *phdr; ! int i, prot, error; caddr_t addr = NULL; size_t zfodsz; int ptload = 0; int page; off_t offset; int hsize = ehdr->e_phentsize; caddr_t mintmp = (caddr_t)-1; extern int use_brk_lpg; if (ehdr->e_type == ET_DYN) { ! /* ! * Obtain the virtual address of a hole in the ! * address space to map the "interpreter". ! */ ! map_addr(&addr, len, (offset_t)0, 1, 0); ! if (addr == NULL) ! return (ENOMEM); ! *voffset = (intptr_t)addr; /* ! * Calculate the minimum vaddr so it can be subtracted out. ! * According to the ELF specification, since PT_LOAD sections ! * must be sorted by increasing p_vaddr values, this is ! * guaranteed to be the first PT_LOAD section. */ phdr = (Phdr *)phdrbase; for (i = nphdrs; i > 0; i--) { if (phdr->p_type == PT_LOAD) { ! *voffset -= (uintptr_t)phdr->p_vaddr; break; } phdr = (Phdr *)((caddr_t)phdr + hsize); } } else { *voffset = 0; } phdr = (Phdr *)phdrbase; for (i = nphdrs; i > 0; i--) { switch (phdr->p_type) { case PT_LOAD: - if ((*dyphdr != NULL) && (*uphdr == NULL)) - return (0); - ptload = 1; prot = PROT_USER; if (phdr->p_flags & PF_R) prot |= PROT_READ; if (phdr->p_flags & PF_W) --- 1401,1490 ---- size_t len, long *execsz, size_t *brksize) { Phdr *phdr; ! int i, prot, error, lastprot = 0; caddr_t addr = NULL; size_t zfodsz; int ptload = 0; int page; off_t offset; int hsize = ehdr->e_phentsize; caddr_t mintmp = (caddr_t)-1; + uintptr_t lastaddr = NULL; extern int use_brk_lpg; if (ehdr->e_type == ET_DYN) { ! caddr_t vaddr; /* ! * Despite the fact that mmapobj(2) refuses to load them, we ! * need to support executing ET_DYN objects that have a ! * non-NULL p_vaddr. When found in the wild, these objects ! * are likely to be due to an old (and largely obviated) Linux ! * facility, prelink(8), that rewrites shared objects to ! * prefer specific (disjoint) virtual address ranges. (Yes, ! * this is putatively for performance -- and yes, it has ! * limited applicability, many edge conditions and grisly ! * failure modes; even for Linux, it's insane.) As ELF ! * mandates that the PT_LOAD segments be in p_vaddr order, we ! * find the lowest p_vaddr by finding the first PT_LOAD ! * segment. */ phdr = (Phdr *)phdrbase; for (i = nphdrs; i > 0; i--) { if (phdr->p_type == PT_LOAD) { ! addr = (caddr_t)(uintptr_t)phdr->p_vaddr; break; } phdr = (Phdr *)((caddr_t)phdr + hsize); } + /* + * We have a non-zero p_vaddr in the first PT_LOAD segment -- + * presumably because we're directly executing a prelink(8)'d + * ld-linux.so. While we could correctly execute such an + * object without locating it at its desired p_vaddr (it is, + * after all, still relocatable), our inner antiquarian + * derives a perverse pleasure in accommodating the steampunk + * prelink(8) contraption -- goggles on! + */ + if ((vaddr = addr) != NULL) { + if (as_gap(curproc->p_as, len, + &addr, &len, AH_LO, NULL) == -1 || addr != vaddr) { + addr = NULL; + } + } + + if (addr == NULL) { + /* + * We either have a NULL p_vaddr (the common case, by + * many orders of magnitude) or we have a non-NULL + * p_vaddr and we were unable to obtain the specified + * VA range (presumably because it's an illegal + * address). Either way, obtain an address in which + * to map the interpreter. + */ + map_addr(&addr, len, (offset_t)0, 1, 0); + if (addr == NULL) + return (ENOMEM); + } + + /* + * Our voffset is the difference between where we landed and + * where we wanted to be. + */ + *voffset = (uintptr_t)addr - (uintptr_t)vaddr; } else { *voffset = 0; } + phdr = (Phdr *)phdrbase; for (i = nphdrs; i > 0; i--) { switch (phdr->p_type) { case PT_LOAD: ptload = 1; prot = PROT_USER; if (phdr->p_flags & PF_R) prot |= PROT_READ; if (phdr->p_flags & PF_W)
*** 1251,1267 **** --- 1492,1571 ---- if (phdr->p_flags & PF_X) prot |= PROT_EXEC; addr = (caddr_t)((uintptr_t)phdr->p_vaddr + *voffset); + if ((*dyphdr != NULL) && uphdr != NULL && + (*uphdr == NULL)) { /* + * The PT_PHDR program header is, strictly + * speaking, optional. If we find that this + * is missing, we will determine the location + * of the program headers based on the address + * of the lowest PT_LOAD segment (namely, this + * one): we subtract the p_offset to get to + * the ELF header and then add back the program + * header offset to get to the program headers. + * We then cons up a Phdr that corresponds to + * the (missing) PT_PHDR, setting the flags + * to 0 to denote that this is artificial and + * should (must) be freed by the caller. + */ + Phdr *cons; + + cons = kmem_zalloc(sizeof (Phdr), KM_SLEEP); + + cons->p_flags = 0; + cons->p_type = PT_PHDR; + cons->p_vaddr = ((uintptr_t)addr - + phdr->p_offset) + ehdr->e_phoff; + + *uphdr = cons; + } + + /* * Keep track of the segment with the lowest starting * address. */ if (addr < mintmp) mintmp = addr; + /* + * Segments need not correspond to page boundaries: + * they are permitted to share a page. If two PT_LOAD + * segments share the same page, and the permissions + * of the segments differ, the behavior is historically + * that the permissions of the latter segment are used + * for the page that the two segments share. This is + * also historically a non-issue: binaries generated + * by most anything will make sure that two PT_LOAD + * segments with differing permissions don't actually + * share any pages. However, there exist some crazy + * things out there (including at least an obscure + * Portuguese teaching language called G-Portugol) that + * actually do the wrong thing and expect it to work: + * they have a segment with execute permission share + * a page with a subsequent segment that does not + * have execute permissions and expect the resulting + * shared page to in fact be executable. To accommodate + * such broken link editors, we take advantage of a + * latitude explicitly granted to the loader: it is + * permitted to make _any_ PT_LOAD segment executable + * (provided that it is readable or writable). If we + * see that we're sharing a page and that the previous + * page was executable, we will add execute permissions + * to our segment. + */ + if (btop(lastaddr) == btop((uintptr_t)addr) && + (phdr->p_flags & (PF_R | PF_W)) && + (lastprot & PROT_EXEC)) { + prot |= PROT_EXEC; + } + + lastaddr = (uintptr_t)addr + phdr->p_filesz; + lastprot = prot; + zfodsz = (size_t)phdr->p_memsz - phdr->p_filesz; offset = phdr->p_offset; if (((uintptr_t)offset & PAGEOFFSET) == ((uintptr_t)addr & PAGEOFFSET) &&
*** 1322,1344 **** *execsz += btopr(phdr->p_memsz); break; case PT_INTERP: ! if (ptload) ! goto bad; *dyphdr = phdr; break; case PT_SHLIB: *stphdr = phdr; break; case PT_PHDR: ! if (ptload) goto bad; *uphdr = phdr; break; case PT_NULL: case PT_DYNAMIC: case PT_NOTE: --- 1626,1665 ---- *execsz += btopr(phdr->p_memsz); break; case PT_INTERP: ! /* ! * The ELF specification is unequivocal about the ! * PT_INTERP program header with respect to any PT_LOAD ! * program header: "If it is present, it must precede ! * any loadable segment entry." Linux, however, makes ! * no attempt to enforce this -- which has allowed some ! * binary editing tools to get away with generating ! * invalid ELF binaries in the respect that PT_INTERP ! * occurs after the first PT_LOAD program header. This ! * is unfortunate (and of course, disappointing) but ! * it's no worse than that: there is no reason that we ! * can't process the PT_INTERP entry (if present) after ! * one or more PT_LOAD entries. We therefore ! * deliberately do not check ptload here and always ! * store dyphdr to be the PT_INTERP program header. ! */ *dyphdr = phdr; break; case PT_SHLIB: *stphdr = phdr; break; case PT_PHDR: ! if (ptload || phdr->p_flags == 0) goto bad; + + if (uphdr != NULL) *uphdr = phdr; + break; case PT_NULL: case PT_DYNAMIC: case PT_NOTE:
*** 2183,2193 **** #ifdef _LP64 extern int elf32exec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred, ! int brand_action); extern int elf32core(vnode_t *vp, proc_t *p, cred_t *credp, rlim64_t rlimit, int sig, core_content_t content); static struct execsw esw32 = { elf32magicstr, --- 2504,2514 ---- #ifdef _LP64 extern int elf32exec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred, ! int *brand_action); extern int elf32core(vnode_t *vp, proc_t *p, cred_t *credp, rlim64_t rlimit, int sig, core_content_t content); static struct execsw esw32 = { elf32magicstr,