Print this page
OS-5015 PT_INTERP headers should be permitted after PT_LOAD headers
OS-5451 comm page should not break i86xpv
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-5192 need faster clock_gettime
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
Reviewed by: Ryan Zezeski <ryan@zinascii.com>
OS-5293 lx brand: prelink(8)'d binaries core dump before main()
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-5072 lxbrand support PT_GNU_STACK
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-5202 Support AT_SECURE & AT_*ID in LX
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
OS-4824 Unlike Linux, nested interpreters don't work
(LX changes only, the rest were upstreamed...)
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Cody Mello <cody.mello@joyent.com>
OS-3735 modstubs MAXNARG is too low.
OS-3733 Verify b_native_exec exists before calling it
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-4119 lxbrand panic when running native perl inside lx zone
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-4128 programs that lack PT_PHDR are not properly loaded
OS-4141 freeing phdrs induces bad kmem_free() in elfexec()
backout OS-4141: needs more work
backout OS-4128: needs more work
OS-4141 freeing phdrs induces bad kmem_free() in elfexec()
OS-4128 programs that lack PT_PHDR are not properly loaded
OS-3696 lx brand: G-Portugol programs core dump
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-3517 lx brand: branded zones don't interpret .interp section
OS-3405 lx brand: socket() fails for PF_INET6
OS-3382 lxbrand 64bit gettimeofday depends on vsyscall or vdso
OS-3280 need a way to specify the root of a native system in the lx brand
OS-3279 lx brand should allow delegated datasets
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-2949 add support for AT_RANDOM aux vector entry
OS-2877 lx_librtld_db falls to load due to NULL DT_DEBUG
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/exec/elf/elf.c
+++ new/usr/src/uts/common/exec/elf/elf.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
|
↓ open down ↓ |
18 lines elided |
↑ open up ↑ |
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 27 /* All Rights Reserved */
28 28 /*
29 - * Copyright (c) 2013, Joyent, Inc. All rights reserved.
29 + * Copyright 2016 Joyent, Inc.
30 30 */
31 31
32 32 #include <sys/types.h>
33 33 #include <sys/param.h>
34 34 #include <sys/thread.h>
35 35 #include <sys/sysmacros.h>
36 36 #include <sys/signal.h>
37 37 #include <sys/cred.h>
38 38 #include <sys/user.h>
39 39 #include <sys/errno.h>
40 40 #include <sys/vnode.h>
41 41 #include <sys/mman.h>
42 42 #include <sys/kmem.h>
43 43 #include <sys/proc.h>
44 44 #include <sys/pathname.h>
45 45 #include <sys/cmn_err.h>
46 46 #include <sys/systm.h>
47 47 #include <sys/elf.h>
48 48 #include <sys/vmsystm.h>
49 49 #include <sys/debug.h>
50 50 #include <sys/auxv.h>
51 51 #include <sys/exec.h>
52 52 #include <sys/prsystm.h>
53 53 #include <vm/as.h>
54 54 #include <vm/rm.h>
55 55 #include <vm/seg.h>
56 56 #include <vm/seg_vn.h>
57 57 #include <sys/modctl.h>
58 58 #include <sys/systeminfo.h>
|
↓ open down ↓ |
19 lines elided |
↑ open up ↑ |
59 59 #include <sys/vmparam.h>
60 60 #include <sys/machelf.h>
61 61 #include <sys/shm_impl.h>
62 62 #include <sys/archsystm.h>
63 63 #include <sys/fasttrap.h>
64 64 #include <sys/brand.h>
65 65 #include "elf_impl.h"
66 66 #include <sys/sdt.h>
67 67 #include <sys/siginfo.h>
68 68
69 +#if defined(__x86)
70 +#include <sys/comm_page_util.h>
71 +#endif /* defined(__x86) */
72 +
73 +
69 74 extern int at_flags;
70 75
71 76 #define ORIGIN_STR "ORIGIN"
72 77 #define ORIGIN_STR_SIZE 6
73 78
74 79 static int getelfhead(vnode_t *, cred_t *, Ehdr *, int *, int *, int *);
75 80 static int getelfphdr(vnode_t *, cred_t *, const Ehdr *, int, caddr_t *,
76 81 ssize_t *);
77 82 static int getelfshdr(vnode_t *, cred_t *, const Ehdr *, int, int, caddr_t *,
78 83 ssize_t *, caddr_t *, ssize_t *);
79 84 static size_t elfsize(Ehdr *, int, caddr_t, uintptr_t *);
80 85 static int mapelfexec(vnode_t *, Ehdr *, int, caddr_t,
81 86 Phdr **, Phdr **, Phdr **, Phdr **, Phdr *,
82 87 caddr_t *, caddr_t *, intptr_t *, intptr_t *, size_t, long *, size_t *);
83 88
84 89 typedef enum {
85 90 STR_CTF,
86 91 STR_SYMTAB,
87 92 STR_DYNSYM,
88 93 STR_STRTAB,
89 94 STR_DYNSTR,
90 95 STR_SHSTRTAB,
91 96 STR_NUM
92 97 } shstrtype_t;
93 98
94 99 static const char *shstrtab_data[] = {
95 100 ".SUNW_ctf",
96 101 ".symtab",
97 102 ".dynsym",
98 103 ".strtab",
99 104 ".dynstr",
100 105 ".shstrtab"
101 106 };
102 107
103 108 typedef struct shstrtab {
104 109 int sst_ndx[STR_NUM];
105 110 int sst_cur;
106 111 } shstrtab_t;
107 112
108 113 static void
109 114 shstrtab_init(shstrtab_t *s)
110 115 {
111 116 bzero(&s->sst_ndx, sizeof (s->sst_ndx));
112 117 s->sst_cur = 1;
113 118 }
114 119
115 120 static int
116 121 shstrtab_ndx(shstrtab_t *s, shstrtype_t type)
117 122 {
118 123 int ret;
119 124
120 125 if ((ret = s->sst_ndx[type]) != 0)
121 126 return (ret);
122 127
123 128 ret = s->sst_ndx[type] = s->sst_cur;
124 129 s->sst_cur += strlen(shstrtab_data[type]) + 1;
125 130
126 131 return (ret);
127 132 }
128 133
129 134 static size_t
130 135 shstrtab_size(const shstrtab_t *s)
131 136 {
132 137 return (s->sst_cur);
133 138 }
134 139
135 140 static void
136 141 shstrtab_dump(const shstrtab_t *s, char *buf)
137 142 {
138 143 int i, ndx;
139 144
140 145 *buf = '\0';
141 146 for (i = 0; i < STR_NUM; i++) {
142 147 if ((ndx = s->sst_ndx[i]) != 0)
143 148 (void) strcpy(buf + ndx, shstrtab_data[i]);
144 149 }
145 150 }
146 151
147 152 static int
148 153 dtrace_safe_phdr(Phdr *phdrp, struct uarg *args, uintptr_t base)
149 154 {
150 155 ASSERT(phdrp->p_type == PT_SUNWDTRACE);
151 156
152 157 /*
153 158 * See the comment in fasttrap.h for information on how to safely
154 159 * update this program header.
155 160 */
|
↓ open down ↓ |
77 lines elided |
↑ open up ↑ |
156 161 if (phdrp->p_memsz < PT_SUNWDTRACE_SIZE ||
157 162 (phdrp->p_flags & (PF_R | PF_W | PF_X)) != (PF_R | PF_W | PF_X))
158 163 return (-1);
159 164
160 165 args->thrptr = phdrp->p_vaddr + base;
161 166
162 167 return (0);
163 168 }
164 169
165 170 /*
166 - * Map in the executable pointed to by vp. Returns 0 on success.
171 + * Map in the executable pointed to by vp. Returns 0 on success. Note that
172 + * this function currently has the maximum number of arguments allowed by
173 + * modstubs on x86 (MAXNARG)! Do _not_ add to this function signature without
174 + * adding to MAXNARG. (Better yet, do not add to this monster of a function
175 + * signature!)
167 176 */
168 177 int
169 178 mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
170 - intptr_t *voffset, caddr_t exec_file, int *interp, caddr_t *bssbase,
171 - caddr_t *brkbase, size_t *brksize, uintptr_t *lddatap)
179 + intptr_t *voffset, caddr_t exec_file, char **interpp, caddr_t *bssbase,
180 + caddr_t *brkbase, size_t *brksize, uintptr_t *lddatap, uintptr_t *minaddrp)
172 181 {
173 182 size_t len;
174 183 struct vattr vat;
175 184 caddr_t phdrbase = NULL;
176 185 ssize_t phdrsize;
177 186 int nshdrs, shstrndx, nphdrs;
178 187 int error = 0;
179 188 Phdr *uphdr = NULL;
180 189 Phdr *junk = NULL;
181 190 Phdr *dynphdr = NULL;
182 191 Phdr *dtrphdr = NULL;
192 + char *interp = NULL;
183 193 uintptr_t lddata;
184 194 long execsz;
185 195 intptr_t minaddr;
186 196
187 197 if (lddatap != NULL)
188 198 *lddatap = NULL;
189 199
200 + if (minaddrp != NULL)
201 + *minaddrp = NULL;
202 +
190 203 if (error = execpermissions(vp, &vat, args)) {
191 204 uprintf("%s: Cannot execute %s\n", exec_file, args->pathname);
192 205 return (error);
193 206 }
194 207
195 208 if ((error = getelfhead(vp, CRED(), ehdr, &nshdrs, &shstrndx,
196 209 &nphdrs)) != 0 ||
197 210 (error = getelfphdr(vp, CRED(), ehdr, nphdrs, &phdrbase,
198 211 &phdrsize)) != 0) {
199 212 uprintf("%s: Cannot read %s\n", exec_file, args->pathname);
200 213 return (error);
201 214 }
202 215
203 216 if ((len = elfsize(ehdr, nphdrs, phdrbase, &lddata)) == 0) {
204 217 uprintf("%s: Nothing to load in %s", exec_file, args->pathname);
|
↓ open down ↓ |
5 lines elided |
↑ open up ↑ |
205 218 kmem_free(phdrbase, phdrsize);
206 219 return (ENOEXEC);
207 220 }
208 221 if (lddatap != NULL)
209 222 *lddatap = lddata;
210 223
211 224 if (error = mapelfexec(vp, ehdr, nphdrs, phdrbase, &uphdr, &dynphdr,
212 225 &junk, &dtrphdr, NULL, bssbase, brkbase, voffset, &minaddr,
213 226 len, &execsz, brksize)) {
214 227 uprintf("%s: Cannot map %s\n", exec_file, args->pathname);
228 + if (uphdr != NULL && uphdr->p_flags == 0)
229 + kmem_free(uphdr, sizeof (Phdr));
215 230 kmem_free(phdrbase, phdrsize);
216 231 return (error);
217 232 }
218 233
234 + if (minaddrp != NULL)
235 + *minaddrp = minaddr;
236 +
219 237 /*
220 - * Inform our caller if the executable needs an interpreter.
238 + * If the executable requires an interpreter, determine its name.
221 239 */
222 - *interp = (dynphdr == NULL) ? 0 : 1;
240 + if (dynphdr != NULL) {
241 + ssize_t resid;
223 242
243 + if (dynphdr->p_filesz > MAXPATHLEN || dynphdr->p_filesz == 0) {
244 + uprintf("%s: Invalid interpreter\n", exec_file);
245 + kmem_free(phdrbase, phdrsize);
246 + return (ENOEXEC);
247 + }
248 +
249 + interp = kmem_alloc(MAXPATHLEN, KM_SLEEP);
250 +
251 + if ((error = vn_rdwr(UIO_READ, vp, interp, dynphdr->p_filesz,
252 + (offset_t)dynphdr->p_offset, UIO_SYSSPACE, 0,
253 + (rlim64_t)0, CRED(), &resid)) != 0 || resid != 0 ||
254 + interp[dynphdr->p_filesz - 1] != '\0') {
255 + uprintf("%s: Cannot obtain interpreter pathname\n",
256 + exec_file);
257 + kmem_free(interp, MAXPATHLEN);
258 + kmem_free(phdrbase, phdrsize);
259 + return (error != 0 ? error : ENOEXEC);
260 + }
261 + }
262 +
224 263 /*
225 264 * If this is a statically linked executable, voffset should indicate
226 265 * the address of the executable itself (it normally holds the address
227 266 * of the interpreter).
228 267 */
229 - if (ehdr->e_type == ET_EXEC && *interp == 0)
268 + if (ehdr->e_type == ET_EXEC && interp == NULL)
230 269 *voffset = minaddr;
231 270
271 + /*
272 + * If the caller has asked for the interpreter name, return it (it's
273 + * up to the caller to free it); if the caller hasn't asked for it,
274 + * free it ourselves.
275 + */
276 + if (interpp != NULL) {
277 + *interpp = interp;
278 + } else if (interp != NULL) {
279 + kmem_free(interp, MAXPATHLEN);
280 + }
281 +
232 282 if (uphdr != NULL) {
233 283 *uphdr_vaddr = uphdr->p_vaddr;
284 +
285 + if (uphdr->p_flags == 0)
286 + kmem_free(uphdr, sizeof (Phdr));
287 + } else if (ehdr->e_type == ET_DYN) {
288 + /*
289 + * If we don't have a uphdr, we'll apply the logic found
290 + * in mapelfexec() and use the p_vaddr of the first PT_LOAD
291 + * section as the base address of the object.
292 + */
293 + Phdr *phdr = (Phdr *)phdrbase;
294 + int i, hsize = ehdr->e_phentsize;
295 +
296 + for (i = nphdrs; i > 0; i--) {
297 + if (phdr->p_type == PT_LOAD) {
298 + *uphdr_vaddr = (uintptr_t)phdr->p_vaddr +
299 + ehdr->e_phoff;
300 + break;
301 + }
302 +
303 + phdr = (Phdr *)((caddr_t)phdr + hsize);
304 + }
305 +
306 + /*
307 + * If we don't have a PT_LOAD segment, we should have returned
308 + * ENOEXEC when elfsize() returned 0, above.
309 + */
310 + VERIFY(i > 0);
234 311 } else {
235 312 *uphdr_vaddr = (Addr)-1;
236 313 }
237 314
238 315 kmem_free(phdrbase, phdrsize);
239 316 return (error);
240 317 }
241 318
242 319 /*ARGSUSED*/
243 320 int
244 321 elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
245 322 int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred,
246 - int brand_action)
323 + int *brand_action)
247 324 {
248 325 caddr_t phdrbase = NULL;
249 326 caddr_t bssbase = 0;
250 327 caddr_t brkbase = 0;
251 328 size_t brksize = 0;
252 - ssize_t dlnsize;
329 + ssize_t dlnsize, nsize = 0;
253 330 aux_entry_t *aux;
254 331 int error;
255 332 ssize_t resid;
256 333 int fd = -1;
257 334 intptr_t voffset;
258 335 Phdr *dyphdr = NULL;
259 336 Phdr *stphdr = NULL;
260 337 Phdr *uphdr = NULL;
261 338 Phdr *junk = NULL;
262 339 size_t len;
263 340 ssize_t phdrsize;
264 341 int postfixsize = 0;
265 342 int i, hsize;
|
↓ open down ↓ |
3 lines elided |
↑ open up ↑ |
266 343 Phdr *phdrp;
267 344 Phdr *dataphdrp = NULL;
268 345 Phdr *dtrphdr;
269 346 Phdr *capphdr = NULL;
270 347 Cap *cap = NULL;
271 348 ssize_t capsize;
272 349 int hasu = 0;
273 350 int hasauxv = 0;
274 351 int hasdy = 0;
275 352 int branded = 0;
353 + int dynuphdr = 0;
276 354
277 355 struct proc *p = ttoproc(curthread);
278 356 struct user *up = PTOU(p);
279 357 struct bigwad {
280 358 Ehdr ehdr;
281 359 aux_entry_t elfargs[__KERN_NAUXV_IMPL];
282 360 char dl_name[MAXPATHLEN];
283 361 char pathbuf[MAXPATHLEN];
284 362 struct vattr vattr;
285 363 struct execenv exenv;
286 364 } *bigwad; /* kmem_alloc this behemoth so we don't blow stack */
287 365 Ehdr *ehdrp;
288 366 int nshdrs, shstrndx, nphdrs;
289 367 char *dlnp;
290 368 char *pathbufp;
291 369 rlim64_t limit;
292 370 rlim64_t roundlimit;
293 371
294 372 ASSERT(p->p_model == DATAMODEL_ILP32 || p->p_model == DATAMODEL_LP64);
295 373
296 374 bigwad = kmem_alloc(sizeof (struct bigwad), KM_SLEEP);
297 375 ehdrp = &bigwad->ehdr;
298 376 dlnp = bigwad->dl_name;
299 377 pathbufp = bigwad->pathbuf;
300 378
301 379 /*
302 380 * Obtain ELF and program header information.
303 381 */
304 382 if ((error = getelfhead(vp, CRED(), ehdrp, &nshdrs, &shstrndx,
305 383 &nphdrs)) != 0 ||
306 384 (error = getelfphdr(vp, CRED(), ehdrp, nphdrs, &phdrbase,
307 385 &phdrsize)) != 0)
308 386 goto out;
309 387
310 388 /*
311 389 * Prevent executing an ELF file that has no entry point.
312 390 */
313 391 if (ehdrp->e_entry == 0) {
314 392 uprintf("%s: Bad entry point\n", exec_file);
315 393 goto bad;
316 394 }
317 395
318 396 /*
319 397 * Put data model that we're exec-ing to into the args passed to
|
↓ open down ↓ |
34 lines elided |
↑ open up ↑ |
320 398 * exec_args(), so it will know what it is copying to on new stack.
321 399 * Now that we know whether we are exec-ing a 32-bit or 64-bit
322 400 * executable, we can set execsz with the appropriate NCARGS.
323 401 */
324 402 #ifdef _LP64
325 403 if (ehdrp->e_ident[EI_CLASS] == ELFCLASS32) {
326 404 args->to_model = DATAMODEL_ILP32;
327 405 *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS32-1);
328 406 } else {
329 407 args->to_model = DATAMODEL_LP64;
330 - args->stk_prot &= ~PROT_EXEC;
408 + if (!args->stk_prot_override) {
409 + args->stk_prot &= ~PROT_EXEC;
410 + }
331 411 #if defined(__i386) || defined(__amd64)
332 412 args->dat_prot &= ~PROT_EXEC;
333 413 #endif
334 414 *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS64-1);
335 415 }
336 416 #else /* _LP64 */
337 417 args->to_model = DATAMODEL_ILP32;
338 418 *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS-1);
339 419 #endif /* _LP64 */
340 420
341 421 /*
342 - * We delay invoking the brand callback until we've figured out
343 - * what kind of elf binary we're trying to run, 32-bit or 64-bit.
344 - * We do this because now the brand library can just check
345 - * args->to_model to see if the target is 32-bit or 64-bit without
346 - * having do duplicate all the code above.
422 + * We delay invoking the brand callback until we've figured out what
423 + * kind of elf binary we're trying to run, 32-bit or 64-bit. We do this
424 + * because now the brand library can just check args->to_model to see if
425 + * the target is 32-bit or 64-bit without having do duplicate all the
426 + * code above.
347 427 *
428 + * We also give the brand a chance to indicate that based on the ELF
429 + * OSABI of the target binary it should become unbranded and optionally
430 + * indicate that it should be treated as existing in a specific prefix.
431 + *
432 + * Note that if a brand opts to go down this route it does not actually
433 + * end up being debranded. In other words, future programs that exec
434 + * will still be considered for branding unless this escape hatch is
435 + * used. Consider the case of lx brand for example. If a user runs
436 + * /native/usr/sbin/dtrace -c /bin/ls, the isaexec and normal executable
437 + * of DTrace that's in /native will take this escape hatch and be run
438 + * and interpreted using the normal system call table; however, the
439 + * execution of a non-illumos binary in the form of /bin/ls will still
440 + * be branded and be subject to all of the normal actions of the brand.
441 + *
348 442 * The level checks associated with brand handling below are used to
349 443 * prevent a loop since the brand elfexec function typically comes back
350 444 * through this function. We must check <= here since the nested
351 445 * handling in the #! interpreter code will increment the level before
352 446 * calling gexec to run the final elfexec interpreter.
353 447 */
448 + if ((level <= INTP_MAXDEPTH) && (*brand_action != EBA_NATIVE) &&
449 + (PROC_IS_BRANDED(p)) && (BROP(p)->b_native_exec != NULL)) {
450 + if (BROP(p)->b_native_exec(ehdrp->e_ident[EI_OSABI],
451 + &args->brand_nroot) == B_TRUE) {
452 + ASSERT(ehdrp->e_ident[EI_OSABI]);
453 + *brand_action = EBA_NATIVE;
454 + /* Add one for the trailing '/' in the path */
455 + if (args->brand_nroot != NULL)
456 + nsize = strlen(args->brand_nroot) + 1;
457 + }
458 + }
459 +
354 460 if ((level <= INTP_MAXDEPTH) &&
355 - (brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
461 + (*brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
356 462 error = BROP(p)->b_elfexec(vp, uap, args,
357 463 idatap, level + 1, execsz, setid, exec_file, cred,
358 464 brand_action);
359 465 goto out;
360 466 }
361 467
362 468 /*
363 469 * Determine aux size now so that stack can be built
364 470 * in one shot (except actual copyout of aux image),
365 471 * determine any non-default stack protections,
366 472 * and still have this code be machine independent.
367 473 */
368 474 hsize = ehdrp->e_phentsize;
369 475 phdrp = (Phdr *)phdrbase;
370 476 for (i = nphdrs; i > 0; i--) {
371 477 switch (phdrp->p_type) {
372 478 case PT_INTERP:
373 479 hasauxv = hasdy = 1;
374 480 break;
375 481 case PT_PHDR:
376 482 hasu = 1;
377 483 break;
378 484 case PT_SUNWSTACK:
379 485 args->stk_prot = PROT_USER;
380 486 if (phdrp->p_flags & PF_R)
381 487 args->stk_prot |= PROT_READ;
382 488 if (phdrp->p_flags & PF_W)
383 489 args->stk_prot |= PROT_WRITE;
384 490 if (phdrp->p_flags & PF_X)
385 491 args->stk_prot |= PROT_EXEC;
386 492 break;
387 493 case PT_LOAD:
388 494 dataphdrp = phdrp;
389 495 break;
390 496 case PT_SUNWCAP:
391 497 capphdr = phdrp;
392 498 break;
393 499 }
394 500 phdrp = (Phdr *)((caddr_t)phdrp + hsize);
395 501 }
396 502
397 503 if (ehdrp->e_type != ET_EXEC) {
398 504 dataphdrp = NULL;
399 505 hasauxv = 1;
400 506 }
401 507
402 508 /* Copy BSS permissions to args->dat_prot */
403 509 if (dataphdrp != NULL) {
404 510 args->dat_prot = PROT_USER;
405 511 if (dataphdrp->p_flags & PF_R)
406 512 args->dat_prot |= PROT_READ;
407 513 if (dataphdrp->p_flags & PF_W)
408 514 args->dat_prot |= PROT_WRITE;
409 515 if (dataphdrp->p_flags & PF_X)
410 516 args->dat_prot |= PROT_EXEC;
411 517 }
412 518
413 519 /*
414 520 * If a auxvector will be required - reserve the space for
415 521 * it now. This may be increased by exec_args if there are
|
↓ open down ↓ |
50 lines elided |
↑ open up ↑ |
416 522 * ISA-specific types (included in __KERN_NAUXV_IMPL).
417 523 */
418 524 if (hasauxv) {
419 525 /*
420 526 * If a AUX vector is being built - the base AUX
421 527 * entries are:
422 528 *
423 529 * AT_BASE
424 530 * AT_FLAGS
425 531 * AT_PAGESZ
532 + * AT_RANDOM
426 533 * AT_SUN_AUXFLAGS
427 534 * AT_SUN_HWCAP
428 535 * AT_SUN_HWCAP2
429 536 * AT_SUN_PLATFORM (added in stk_copyout)
430 537 * AT_SUN_EXECNAME (added in stk_copyout)
431 538 * AT_NULL
432 539 *
433 - * total == 9
540 + * total == 10
434 541 */
435 542 if (hasdy && hasu) {
436 543 /*
437 544 * Has PT_INTERP & PT_PHDR - the auxvectors that
438 545 * will be built are:
439 546 *
440 547 * AT_PHDR
441 548 * AT_PHENT
442 549 * AT_PHNUM
443 550 * AT_ENTRY
444 551 * AT_LDDATA
445 552 *
446 553 * total = 5
447 554 */
448 - args->auxsize = (9 + 5) * sizeof (aux_entry_t);
555 + args->auxsize = (10 + 5) * sizeof (aux_entry_t);
449 556 } else if (hasdy) {
450 557 /*
451 558 * Has PT_INTERP but no PT_PHDR
452 559 *
453 560 * AT_EXECFD
454 561 * AT_LDDATA
455 562 *
456 563 * total = 2
457 564 */
458 - args->auxsize = (9 + 2) * sizeof (aux_entry_t);
565 + args->auxsize = (10 + 2) * sizeof (aux_entry_t);
459 566 } else {
460 - args->auxsize = 9 * sizeof (aux_entry_t);
567 + args->auxsize = 10 * sizeof (aux_entry_t);
461 568 }
462 569 } else {
463 570 args->auxsize = 0;
464 571 }
465 572
466 573 /*
467 574 * If this binary is using an emulator, we need to add an
468 575 * AT_SUN_EMULATOR aux entry.
469 576 */
470 577 if (args->emulator != NULL)
471 578 args->auxsize += sizeof (aux_entry_t);
472 579
473 - if ((brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
580 + /*
581 + * If this is a native binary that's been given a modified interpreter
582 + * root, inform it that the native system exists at that root.
583 + */
584 + if (args->brand_nroot != NULL) {
585 + args->auxsize += sizeof (aux_entry_t);
586 + }
587 +
588 +
589 + /*
590 + * On supported kernels (x86_64) make room in the auxv for the
591 + * AT_SUN_COMMPAGE entry. This will go unpopulated on i86xpv systems
592 + * which do not provide such functionality.
593 + */
594 +#if defined(__amd64)
595 + args->auxsize += sizeof (aux_entry_t);
596 +#endif /* defined(__amd64) */
597 +
598 + /*
599 + * If we have user credentials, we'll supply the following entries:
600 + * AT_SUN_UID
601 + * AT_SUN_RUID
602 + * AT_SUN_GID
603 + * AT_SUN_RGID
604 + */
605 + if (cred != NULL) {
606 + args->auxsize += 4 * sizeof (aux_entry_t);
607 + }
608 +
609 + if ((*brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
474 610 branded = 1;
475 611 /*
476 - * We will be adding 4 entries to the aux vectors. One for
477 - * the the brandname and 3 for the brand specific aux vectors.
612 + * We will be adding 5 entries to the aux vectors. One for
613 + * the the brandname and 4 for the brand specific aux vectors.
478 614 */
479 - args->auxsize += 4 * sizeof (aux_entry_t);
615 + args->auxsize += 5 * sizeof (aux_entry_t);
480 616 }
481 617
482 618 /* Hardware/Software capabilities */
483 619 if (capphdr != NULL &&
484 620 (capsize = capphdr->p_filesz) > 0 &&
485 621 capsize <= 16 * sizeof (*cap)) {
486 622 int ncaps = capsize / sizeof (*cap);
487 623 Cap *cp;
488 624
489 625 cap = kmem_alloc(capsize, KM_SLEEP);
490 626 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)cap,
491 627 capsize, (offset_t)capphdr->p_offset,
492 628 UIO_SYSSPACE, 0, (rlim64_t)0, CRED(), &resid)) != 0) {
493 629 uprintf("%s: Cannot read capabilities section\n",
494 630 exec_file);
495 631 goto out;
496 632 }
497 633 for (cp = cap; cp < cap + ncaps; cp++) {
498 634 if (cp->c_tag == CA_SUNW_SF_1 &&
499 635 (cp->c_un.c_val & SF1_SUNW_ADDR32)) {
500 636 if (args->to_model == DATAMODEL_LP64)
501 637 args->addr32 = 1;
502 638 break;
503 639 }
504 640 }
505 641 }
506 642
507 643 aux = bigwad->elfargs;
508 644 /*
509 645 * Move args to the user's stack.
510 646 * This can fill in the AT_SUN_PLATFORM and AT_SUN_EXECNAME aux entries.
511 647 */
512 648 if ((error = exec_args(uap, args, idatap, (void **)&aux)) != 0) {
513 649 if (error == -1) {
514 650 error = ENOEXEC;
515 651 goto bad;
516 652 }
517 653 goto out;
518 654 }
519 655 /* we're single threaded after this point */
520 656
521 657 /*
522 658 * If this is an ET_DYN executable (shared object),
523 659 * determine its memory size so that mapelfexec() can load it.
524 660 */
525 661 if (ehdrp->e_type == ET_DYN)
526 662 len = elfsize(ehdrp, nphdrs, phdrbase, NULL);
|
↓ open down ↓ |
37 lines elided |
↑ open up ↑ |
527 663 else
528 664 len = 0;
529 665
530 666 dtrphdr = NULL;
531 667
532 668 if ((error = mapelfexec(vp, ehdrp, nphdrs, phdrbase, &uphdr, &dyphdr,
533 669 &stphdr, &dtrphdr, dataphdrp, &bssbase, &brkbase, &voffset, NULL,
534 670 len, execsz, &brksize)) != 0)
535 671 goto bad;
536 672
673 + if (uphdr != NULL) {
674 + /*
675 + * Our uphdr has been dynamically allocated if (and only if)
676 + * its program header flags are clear.
677 + */
678 + dynuphdr = (uphdr->p_flags == 0);
679 + }
680 +
537 681 if (uphdr != NULL && dyphdr == NULL)
538 682 goto bad;
539 683
540 684 if (dtrphdr != NULL && dtrace_safe_phdr(dtrphdr, args, voffset) != 0) {
541 685 uprintf("%s: Bad DTrace phdr in %s\n", exec_file, exec_file);
542 686 goto bad;
543 687 }
544 688
545 689 if (dyphdr != NULL) {
546 690 size_t len;
547 691 uintptr_t lddata;
548 692 char *p;
549 693 struct vnode *nvp;
550 694
551 - dlnsize = dyphdr->p_filesz;
695 + dlnsize = dyphdr->p_filesz + nsize;
552 696
553 697 if (dlnsize > MAXPATHLEN || dlnsize <= 0)
554 698 goto bad;
555 699
700 + if (nsize != 0) {
701 + bcopy(args->brand_nroot, dlnp, nsize - 1);
702 + dlnp[nsize - 1] = '/';
703 + }
704 +
556 705 /*
557 706 * Read in "interpreter" pathname.
558 707 */
559 - if ((error = vn_rdwr(UIO_READ, vp, dlnp, dyphdr->p_filesz,
560 - (offset_t)dyphdr->p_offset, UIO_SYSSPACE, 0, (rlim64_t)0,
561 - CRED(), &resid)) != 0) {
708 + if ((error = vn_rdwr(UIO_READ, vp, dlnp + nsize,
709 + dyphdr->p_filesz, (offset_t)dyphdr->p_offset, UIO_SYSSPACE,
710 + 0, (rlim64_t)0, CRED(), &resid)) != 0) {
562 711 uprintf("%s: Cannot obtain interpreter pathname\n",
563 712 exec_file);
564 713 goto bad;
565 714 }
566 715
567 716 if (resid != 0 || dlnp[dlnsize - 1] != '\0')
568 717 goto bad;
569 718
570 719 /*
571 720 * Search for '$ORIGIN' token in interpreter path.
572 721 * If found, expand it.
573 722 */
574 723 for (p = dlnp; p = strchr(p, '$'); ) {
575 724 uint_t len, curlen;
576 725 char *_ptr;
577 726
578 727 if (strncmp(++p, ORIGIN_STR, ORIGIN_STR_SIZE))
579 728 continue;
580 729
581 730 /*
582 731 * We don't support $ORIGIN on setid programs to close
583 732 * a potential attack vector.
584 733 */
585 734 if ((setid & EXECSETID_SETID) != 0) {
586 735 error = ENOEXEC;
587 736 goto bad;
588 737 }
589 738
590 739 curlen = 0;
591 740 len = p - dlnp - 1;
592 741 if (len) {
593 742 bcopy(dlnp, pathbufp, len);
594 743 curlen += len;
595 744 }
596 745 if (_ptr = strrchr(args->pathname, '/')) {
597 746 len = _ptr - args->pathname;
598 747 if ((curlen + len) > MAXPATHLEN)
599 748 break;
600 749
601 750 bcopy(args->pathname, &pathbufp[curlen], len);
602 751 curlen += len;
603 752 } else {
604 753 /*
605 754 * executable is a basename found in the
606 755 * current directory. So - just substitue
607 756 * '.' for ORIGIN.
608 757 */
609 758 pathbufp[curlen] = '.';
610 759 curlen++;
611 760 }
612 761 p += ORIGIN_STR_SIZE;
613 762 len = strlen(p);
614 763
615 764 if ((curlen + len) > MAXPATHLEN)
616 765 break;
617 766 bcopy(p, &pathbufp[curlen], len);
618 767 curlen += len;
619 768 pathbufp[curlen++] = '\0';
620 769 bcopy(pathbufp, dlnp, curlen);
621 770 }
622 771
623 772 /*
624 773 * /usr/lib/ld.so.1 is known to be a symlink to /lib/ld.so.1
625 774 * (and /usr/lib/64/ld.so.1 is a symlink to /lib/64/ld.so.1).
626 775 * Just in case /usr is not mounted, change it now.
627 776 */
628 777 if (strcmp(dlnp, USR_LIB_RTLD) == 0)
629 778 dlnp += 4;
630 779 error = lookupname(dlnp, UIO_SYSSPACE, FOLLOW, NULLVPP, &nvp);
631 780 if (error && dlnp != bigwad->dl_name) {
632 781 /* new kernel, old user-level */
633 782 error = lookupname(dlnp -= 4, UIO_SYSSPACE, FOLLOW,
634 783 NULLVPP, &nvp);
635 784 }
636 785 if (error) {
637 786 uprintf("%s: Cannot find %s\n", exec_file, dlnp);
638 787 goto bad;
639 788 }
640 789
641 790 /*
642 791 * Setup the "aux" vector.
643 792 */
644 793 if (uphdr) {
645 794 if (ehdrp->e_type == ET_DYN) {
646 795 /* don't use the first page */
647 796 bigwad->exenv.ex_brkbase = (caddr_t)PAGESIZE;
648 797 bigwad->exenv.ex_bssbase = (caddr_t)PAGESIZE;
649 798 } else {
650 799 bigwad->exenv.ex_bssbase = bssbase;
651 800 bigwad->exenv.ex_brkbase = brkbase;
652 801 }
653 802 bigwad->exenv.ex_brksize = brksize;
654 803 bigwad->exenv.ex_magic = elfmagic;
655 804 bigwad->exenv.ex_vp = vp;
656 805 setexecenv(&bigwad->exenv);
657 806
658 807 ADDAUX(aux, AT_PHDR, uphdr->p_vaddr + voffset)
659 808 ADDAUX(aux, AT_PHENT, ehdrp->e_phentsize)
660 809 ADDAUX(aux, AT_PHNUM, nphdrs)
661 810 ADDAUX(aux, AT_ENTRY, ehdrp->e_entry + voffset)
662 811 } else {
663 812 if ((error = execopen(&vp, &fd)) != 0) {
664 813 VN_RELE(nvp);
665 814 goto bad;
666 815 }
667 816
668 817 ADDAUX(aux, AT_EXECFD, fd)
669 818 }
670 819
671 820 if ((error = execpermissions(nvp, &bigwad->vattr, args)) != 0) {
672 821 VN_RELE(nvp);
673 822 uprintf("%s: Cannot execute %s\n", exec_file, dlnp);
674 823 goto bad;
675 824 }
676 825
677 826 /*
678 827 * Now obtain the ELF header along with the entire program
679 828 * header contained in "nvp".
680 829 */
681 830 kmem_free(phdrbase, phdrsize);
682 831 phdrbase = NULL;
683 832 if ((error = getelfhead(nvp, CRED(), ehdrp, &nshdrs,
684 833 &shstrndx, &nphdrs)) != 0 ||
685 834 (error = getelfphdr(nvp, CRED(), ehdrp, nphdrs, &phdrbase,
686 835 &phdrsize)) != 0) {
687 836 VN_RELE(nvp);
688 837 uprintf("%s: Cannot read %s\n", exec_file, dlnp);
689 838 goto bad;
690 839 }
691 840
692 841 /*
693 842 * Determine memory size of the "interpreter's" loadable
694 843 * sections. This size is then used to obtain the virtual
695 844 * address of a hole, in the user's address space, large
|
↓ open down ↓ |
124 lines elided |
↑ open up ↑ |
696 845 * enough to map the "interpreter".
697 846 */
698 847 if ((len = elfsize(ehdrp, nphdrs, phdrbase, &lddata)) == 0) {
699 848 VN_RELE(nvp);
700 849 uprintf("%s: Nothing to load in %s\n", exec_file, dlnp);
701 850 goto bad;
702 851 }
703 852
704 853 dtrphdr = NULL;
705 854
706 - error = mapelfexec(nvp, ehdrp, nphdrs, phdrbase, &junk, &junk,
855 + error = mapelfexec(nvp, ehdrp, nphdrs, phdrbase, NULL, &junk,
707 856 &junk, &dtrphdr, NULL, NULL, NULL, &voffset, NULL, len,
708 857 execsz, NULL);
858 +
709 859 if (error || junk != NULL) {
710 860 VN_RELE(nvp);
711 861 uprintf("%s: Cannot map %s\n", exec_file, dlnp);
712 862 goto bad;
713 863 }
714 864
715 865 /*
716 866 * We use the DTrace program header to initialize the
717 867 * architecture-specific user per-LWP location. The dtrace
718 868 * fasttrap provider requires ready access to per-LWP scratch
719 869 * space. We assume that there is only one such program header
720 870 * in the interpreter.
721 871 */
722 872 if (dtrphdr != NULL &&
723 873 dtrace_safe_phdr(dtrphdr, args, voffset) != 0) {
724 874 VN_RELE(nvp);
|
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
725 875 uprintf("%s: Bad DTrace phdr in %s\n", exec_file, dlnp);
726 876 goto bad;
727 877 }
728 878
729 879 VN_RELE(nvp);
730 880 ADDAUX(aux, AT_SUN_LDDATA, voffset + lddata)
731 881 }
732 882
733 883 if (hasauxv) {
734 884 int auxf = AF_SUN_HWCAPVERIFY;
885 +
735 886 /*
736 - * Note: AT_SUN_PLATFORM and AT_SUN_EXECNAME were filled in via
887 + * Note: AT_SUN_PLATFORM and AT_RANDOM were filled in via
737 888 * exec_args()
738 889 */
739 890 ADDAUX(aux, AT_BASE, voffset)
740 891 ADDAUX(aux, AT_FLAGS, at_flags)
741 892 ADDAUX(aux, AT_PAGESZ, PAGESIZE)
742 893 /*
743 894 * Linker flags. (security)
744 895 * p_flag not yet set at this time.
745 896 * We rely on gexec() to provide us with the information.
746 897 * If the application is set-uid but this is not reflected
747 898 * in a mismatch between real/effective uids/gids, then
748 899 * don't treat this as a set-uid exec. So we care about
749 900 * the EXECSETID_UGIDS flag but not the ...SETID flag.
750 901 */
751 902 if ((setid &= ~EXECSETID_SETID) != 0)
752 903 auxf |= AF_SUN_SETUGID;
753 904
754 905 /*
|
↓ open down ↓ |
8 lines elided |
↑ open up ↑ |
755 906 * If we're running a native process from within a branded
756 907 * zone under pfexec then we clear the AF_SUN_SETUGID flag so
757 908 * that the native ld.so.1 is able to link with the native
758 909 * libraries instead of using the brand libraries that are
759 910 * installed in the zone. We only do this for processes
760 911 * which we trust because we see they are already running
761 912 * under pfexec (where uid != euid). This prevents a
762 913 * malicious user within the zone from crafting a wrapper to
763 914 * run native suid commands with unsecure libraries interposed.
764 915 */
765 - if ((brand_action == EBA_NATIVE) && (PROC_IS_BRANDED(p) &&
916 + if ((*brand_action == EBA_NATIVE) && (PROC_IS_BRANDED(p) &&
766 917 (setid &= ~EXECSETID_SETID) != 0))
767 918 auxf &= ~AF_SUN_SETUGID;
768 919
769 920 /*
770 921 * Record the user addr of the auxflags aux vector entry
771 922 * since brands may optionally want to manipulate this field.
772 923 */
773 924 args->auxp_auxflags =
774 925 (char *)((char *)args->stackend +
775 926 ((char *)&aux->a_type -
776 927 (char *)bigwad->elfargs));
777 928 ADDAUX(aux, AT_SUN_AUXFLAGS, auxf);
929 +
778 930 /*
931 + * Record information about the real and effective user and
932 + * group IDs.
933 + */
934 + if (cred != NULL) {
935 + ADDAUX(aux, AT_SUN_UID, crgetuid(cred));
936 + ADDAUX(aux, AT_SUN_RUID, crgetruid(cred));
937 + ADDAUX(aux, AT_SUN_GID, crgetgid(cred));
938 + ADDAUX(aux, AT_SUN_RGID, crgetrgid(cred));
939 + }
940 +
941 + /*
779 942 * Hardware capability flag word (performance hints)
780 943 * Used for choosing faster library routines.
781 944 * (Potentially different between 32-bit and 64-bit ABIs)
782 945 */
783 946 #if defined(_LP64)
784 947 if (args->to_model == DATAMODEL_NATIVE) {
785 948 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
786 949 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2)
787 950 } else {
788 951 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap32)
789 952 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap32_2)
790 953 }
791 954 #else
792 955 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
793 956 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2)
794 957 #endif
795 958 if (branded) {
796 959 /*
|
↓ open down ↓ |
8 lines elided |
↑ open up ↑ |
797 960 * Reserve space for the brand-private aux vectors,
798 961 * and record the user addr of that space.
799 962 */
800 963 args->auxp_brand =
801 964 (char *)((char *)args->stackend +
802 965 ((char *)&aux->a_type -
803 966 (char *)bigwad->elfargs));
804 967 ADDAUX(aux, AT_SUN_BRAND_AUX1, 0)
805 968 ADDAUX(aux, AT_SUN_BRAND_AUX2, 0)
806 969 ADDAUX(aux, AT_SUN_BRAND_AUX3, 0)
970 + ADDAUX(aux, AT_SUN_BRAND_AUX4, 0)
807 971 }
808 972
973 + /*
974 + * Add the comm page auxv entry, mapping it in if needed.
975 + */
976 +#if defined(__amd64)
977 + if (args->commpage != NULL ||
978 + (args->commpage = (uintptr_t)comm_page_mapin()) != NULL) {
979 + ADDAUX(aux, AT_SUN_COMMPAGE, args->commpage)
980 + } else {
981 + /*
982 + * If the comm page cannot be mapped, pad out the auxv
983 + * to satisfy later size checks.
984 + */
985 + ADDAUX(aux, AT_NULL, 0)
986 + }
987 +#endif /* defined(__amd64) */
988 +
809 989 ADDAUX(aux, AT_NULL, 0)
810 990 postfixsize = (char *)aux - (char *)bigwad->elfargs;
811 991
812 992 /*
813 993 * We make assumptions above when we determine how many aux
814 994 * vector entries we will be adding. However, if we have an
815 995 * invalid elf file, it is possible that mapelfexec might
816 996 * behave differently (but not return an error), in which case
817 997 * the number of aux entries we actually add will be different.
818 998 * We detect that now and error out.
819 999 */
820 1000 if (postfixsize != args->auxsize) {
821 1001 DTRACE_PROBE2(elfexec_badaux, int, postfixsize,
822 1002 int, args->auxsize);
823 1003 goto bad;
824 1004 }
825 1005 ASSERT(postfixsize <= __KERN_NAUXV_IMPL * sizeof (aux_entry_t));
826 1006 }
827 1007
828 1008 /*
829 1009 * For the 64-bit kernel, the limit is big enough that rounding it up
830 1010 * to a page can overflow the 64-bit limit, so we check for btopr()
831 1011 * overflowing here by comparing it with the unrounded limit in pages.
832 1012 * If it hasn't overflowed, compare the exec size with the rounded up
833 1013 * limit in pages. Otherwise, just compare with the unrounded limit.
834 1014 */
835 1015 limit = btop(p->p_vmem_ctl);
836 1016 roundlimit = btopr(p->p_vmem_ctl);
837 1017 if ((roundlimit > limit && *execsz > roundlimit) ||
|
↓ open down ↓ |
19 lines elided |
↑ open up ↑ |
838 1018 (roundlimit < limit && *execsz > limit)) {
839 1019 mutex_enter(&p->p_lock);
840 1020 (void) rctl_action(rctlproc_legacy[RLIMIT_VMEM], p->p_rctls, p,
841 1021 RCA_SAFE);
842 1022 mutex_exit(&p->p_lock);
843 1023 error = ENOMEM;
844 1024 goto bad;
845 1025 }
846 1026
847 1027 bzero(up->u_auxv, sizeof (up->u_auxv));
1028 + up->u_commpagep = args->commpage;
848 1029 if (postfixsize) {
849 1030 int num_auxv;
850 1031
851 1032 /*
852 1033 * Copy the aux vector to the user stack.
853 1034 */
854 1035 error = execpoststack(args, bigwad->elfargs, postfixsize);
855 1036 if (error)
856 1037 goto bad;
857 1038
858 1039 /*
859 1040 * Copy auxv to the process's user structure for use by /proc.
860 1041 * If this is a branded process, the brand's exec routine will
861 1042 * copy it's private entries to the user structure later. It
862 1043 * relies on the fact that the blank entries are at the end.
863 1044 */
864 1045 num_auxv = postfixsize / sizeof (aux_entry_t);
865 1046 ASSERT(num_auxv <= sizeof (up->u_auxv) / sizeof (auxv_t));
866 1047 aux = bigwad->elfargs;
867 1048 for (i = 0; i < num_auxv; i++) {
868 1049 up->u_auxv[i].a_type = aux[i].a_type;
869 1050 up->u_auxv[i].a_un.a_val = (aux_val_t)aux[i].a_un.a_val;
870 1051 }
871 1052 }
872 1053
873 1054 /*
874 1055 * Pass back the starting address so we can set the program counter.
875 1056 */
876 1057 args->entry = (uintptr_t)(ehdrp->e_entry + voffset);
877 1058
878 1059 if (!uphdr) {
879 1060 if (ehdrp->e_type == ET_DYN) {
880 1061 /*
881 1062 * If we are executing a shared library which doesn't
882 1063 * have a interpreter (probably ld.so.1) then
883 1064 * we don't set the brkbase now. Instead we
884 1065 * delay it's setting until the first call
885 1066 * via grow.c::brk(). This permits ld.so.1 to
886 1067 * initialize brkbase to the tail of the executable it
887 1068 * loads (which is where it needs to be).
888 1069 */
889 1070 bigwad->exenv.ex_brkbase = (caddr_t)0;
890 1071 bigwad->exenv.ex_bssbase = (caddr_t)0;
891 1072 bigwad->exenv.ex_brksize = 0;
892 1073 } else {
893 1074 bigwad->exenv.ex_brkbase = brkbase;
894 1075 bigwad->exenv.ex_bssbase = bssbase;
895 1076 bigwad->exenv.ex_brksize = brksize;
896 1077 }
897 1078 bigwad->exenv.ex_magic = elfmagic;
898 1079 bigwad->exenv.ex_vp = vp;
899 1080 setexecenv(&bigwad->exenv);
900 1081 }
901 1082
902 1083 ASSERT(error == 0);
903 1084 goto out;
|
↓ open down ↓ |
46 lines elided |
↑ open up ↑ |
904 1085
905 1086 bad:
906 1087 if (fd != -1) /* did we open the a.out yet */
907 1088 (void) execclose(fd);
908 1089
909 1090 psignal(p, SIGKILL);
910 1091
911 1092 if (error == 0)
912 1093 error = ENOEXEC;
913 1094 out:
1095 + if (dynuphdr)
1096 + kmem_free(uphdr, sizeof (Phdr));
914 1097 if (phdrbase != NULL)
915 1098 kmem_free(phdrbase, phdrsize);
916 1099 if (cap != NULL)
917 1100 kmem_free(cap, capsize);
918 1101 kmem_free(bigwad, sizeof (struct bigwad));
919 1102 return (error);
920 1103 }
921 1104
922 1105 /*
923 1106 * Compute the memory size requirement for the ELF file.
924 1107 */
925 1108 static size_t
926 1109 elfsize(Ehdr *ehdrp, int nphdrs, caddr_t phdrbase, uintptr_t *lddata)
927 1110 {
928 1111 size_t len;
929 1112 Phdr *phdrp = (Phdr *)phdrbase;
930 1113 int hsize = ehdrp->e_phentsize;
931 1114 int first = 1;
932 1115 int dfirst = 1; /* first data segment */
933 1116 uintptr_t loaddr = 0;
934 1117 uintptr_t hiaddr = 0;
935 1118 uintptr_t lo, hi;
936 1119 int i;
937 1120
938 1121 for (i = nphdrs; i > 0; i--) {
939 1122 if (phdrp->p_type == PT_LOAD) {
940 1123 lo = phdrp->p_vaddr;
941 1124 hi = lo + phdrp->p_memsz;
942 1125 if (first) {
943 1126 loaddr = lo;
944 1127 hiaddr = hi;
945 1128 first = 0;
946 1129 } else {
947 1130 if (loaddr > lo)
948 1131 loaddr = lo;
949 1132 if (hiaddr < hi)
950 1133 hiaddr = hi;
951 1134 }
952 1135
953 1136 /*
954 1137 * save the address of the first data segment
955 1138 * of a object - used for the AT_SUNW_LDDATA
956 1139 * aux entry.
957 1140 */
958 1141 if ((lddata != NULL) && dfirst &&
959 1142 (phdrp->p_flags & PF_W)) {
960 1143 *lddata = lo;
961 1144 dfirst = 0;
962 1145 }
963 1146 }
964 1147 phdrp = (Phdr *)((caddr_t)phdrp + hsize);
965 1148 }
966 1149
967 1150 len = hiaddr - (loaddr & PAGEMASK);
968 1151 len = roundup(len, PAGESIZE);
969 1152
970 1153 return (len);
971 1154 }
972 1155
973 1156 /*
974 1157 * Read in the ELF header and program header table.
975 1158 * SUSV3 requires:
976 1159 * ENOEXEC File format is not recognized
977 1160 * EINVAL Format recognized but execution not supported
978 1161 */
979 1162 static int
980 1163 getelfhead(vnode_t *vp, cred_t *credp, Ehdr *ehdr, int *nshdrs, int *shstrndx,
981 1164 int *nphdrs)
982 1165 {
983 1166 int error;
984 1167 ssize_t resid;
985 1168
986 1169 /*
987 1170 * We got here by the first two bytes in ident,
988 1171 * now read the entire ELF header.
989 1172 */
990 1173 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)ehdr,
991 1174 sizeof (Ehdr), (offset_t)0, UIO_SYSSPACE, 0,
992 1175 (rlim64_t)0, credp, &resid)) != 0)
993 1176 return (error);
994 1177
995 1178 /*
996 1179 * Since a separate version is compiled for handling 32-bit and
997 1180 * 64-bit ELF executables on a 64-bit kernel, the 64-bit version
998 1181 * doesn't need to be able to deal with 32-bit ELF files.
999 1182 */
1000 1183 if (resid != 0 ||
1001 1184 ehdr->e_ident[EI_MAG2] != ELFMAG2 ||
1002 1185 ehdr->e_ident[EI_MAG3] != ELFMAG3)
1003 1186 return (ENOEXEC);
1004 1187
1005 1188 if ((ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) ||
1006 1189 #if defined(_ILP32) || defined(_ELF32_COMPAT)
1007 1190 ehdr->e_ident[EI_CLASS] != ELFCLASS32 ||
1008 1191 #else
1009 1192 ehdr->e_ident[EI_CLASS] != ELFCLASS64 ||
1010 1193 #endif
1011 1194 !elfheadcheck(ehdr->e_ident[EI_DATA], ehdr->e_machine,
1012 1195 ehdr->e_flags))
1013 1196 return (EINVAL);
1014 1197
1015 1198 *nshdrs = ehdr->e_shnum;
1016 1199 *shstrndx = ehdr->e_shstrndx;
1017 1200 *nphdrs = ehdr->e_phnum;
1018 1201
1019 1202 /*
1020 1203 * If e_shnum, e_shstrndx, or e_phnum is its sentinel value, we need
1021 1204 * to read in the section header at index zero to acces the true
1022 1205 * values for those fields.
1023 1206 */
1024 1207 if ((*nshdrs == 0 && ehdr->e_shoff != 0) ||
1025 1208 *shstrndx == SHN_XINDEX || *nphdrs == PN_XNUM) {
1026 1209 Shdr shdr;
1027 1210
1028 1211 if (ehdr->e_shoff == 0)
1029 1212 return (EINVAL);
1030 1213
1031 1214 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)&shdr,
1032 1215 sizeof (shdr), (offset_t)ehdr->e_shoff, UIO_SYSSPACE, 0,
1033 1216 (rlim64_t)0, credp, &resid)) != 0)
1034 1217 return (error);
1035 1218
1036 1219 if (*nshdrs == 0)
1037 1220 *nshdrs = shdr.sh_size;
1038 1221 if (*shstrndx == SHN_XINDEX)
1039 1222 *shstrndx = shdr.sh_link;
1040 1223 if (*nphdrs == PN_XNUM && shdr.sh_info != 0)
1041 1224 *nphdrs = shdr.sh_info;
1042 1225 }
1043 1226
1044 1227 return (0);
1045 1228 }
1046 1229
1047 1230 #ifdef _ELF32_COMPAT
1048 1231 extern size_t elf_nphdr_max;
1049 1232 #else
1050 1233 size_t elf_nphdr_max = 1000;
1051 1234 #endif
1052 1235
1053 1236 static int
1054 1237 getelfphdr(vnode_t *vp, cred_t *credp, const Ehdr *ehdr, int nphdrs,
1055 1238 caddr_t *phbasep, ssize_t *phsizep)
1056 1239 {
1057 1240 ssize_t resid, minsize;
1058 1241 int err;
1059 1242
1060 1243 /*
1061 1244 * Since we're going to be using e_phentsize to iterate down the
1062 1245 * array of program headers, it must be 8-byte aligned or else
1063 1246 * a we might cause a misaligned access. We use all members through
1064 1247 * p_flags on 32-bit ELF files and p_memsz on 64-bit ELF files so
1065 1248 * e_phentsize must be at least large enough to include those
1066 1249 * members.
1067 1250 */
1068 1251 #if !defined(_LP64) || defined(_ELF32_COMPAT)
1069 1252 minsize = offsetof(Phdr, p_flags) + sizeof (((Phdr *)NULL)->p_flags);
1070 1253 #else
1071 1254 minsize = offsetof(Phdr, p_memsz) + sizeof (((Phdr *)NULL)->p_memsz);
1072 1255 #endif
1073 1256 if (ehdr->e_phentsize < minsize || (ehdr->e_phentsize & 3))
1074 1257 return (EINVAL);
1075 1258
1076 1259 *phsizep = nphdrs * ehdr->e_phentsize;
1077 1260
1078 1261 if (*phsizep > sizeof (Phdr) * elf_nphdr_max) {
1079 1262 if ((*phbasep = kmem_alloc(*phsizep, KM_NOSLEEP)) == NULL)
1080 1263 return (ENOMEM);
1081 1264 } else {
1082 1265 *phbasep = kmem_alloc(*phsizep, KM_SLEEP);
1083 1266 }
1084 1267
1085 1268 if ((err = vn_rdwr(UIO_READ, vp, *phbasep, *phsizep,
1086 1269 (offset_t)ehdr->e_phoff, UIO_SYSSPACE, 0, (rlim64_t)0,
1087 1270 credp, &resid)) != 0) {
1088 1271 kmem_free(*phbasep, *phsizep);
1089 1272 *phbasep = NULL;
1090 1273 return (err);
1091 1274 }
1092 1275
1093 1276 return (0);
1094 1277 }
1095 1278
1096 1279 #ifdef _ELF32_COMPAT
1097 1280 extern size_t elf_nshdr_max;
1098 1281 extern size_t elf_shstrtab_max;
1099 1282 #else
1100 1283 size_t elf_nshdr_max = 10000;
1101 1284 size_t elf_shstrtab_max = 100 * 1024;
1102 1285 #endif
1103 1286
1104 1287
1105 1288 static int
1106 1289 getelfshdr(vnode_t *vp, cred_t *credp, const Ehdr *ehdr,
1107 1290 int nshdrs, int shstrndx, caddr_t *shbasep, ssize_t *shsizep,
1108 1291 char **shstrbasep, ssize_t *shstrsizep)
1109 1292 {
1110 1293 ssize_t resid, minsize;
1111 1294 int err;
1112 1295 Shdr *shdr;
1113 1296
1114 1297 /*
1115 1298 * Since we're going to be using e_shentsize to iterate down the
1116 1299 * array of section headers, it must be 8-byte aligned or else
1117 1300 * a we might cause a misaligned access. We use all members through
1118 1301 * sh_entsize (on both 32- and 64-bit ELF files) so e_shentsize
1119 1302 * must be at least large enough to include that member. The index
1120 1303 * of the string table section must also be valid.
1121 1304 */
1122 1305 minsize = offsetof(Shdr, sh_entsize) + sizeof (shdr->sh_entsize);
1123 1306 if (ehdr->e_shentsize < minsize || (ehdr->e_shentsize & 3) ||
1124 1307 shstrndx >= nshdrs)
1125 1308 return (EINVAL);
1126 1309
1127 1310 *shsizep = nshdrs * ehdr->e_shentsize;
1128 1311
1129 1312 if (*shsizep > sizeof (Shdr) * elf_nshdr_max) {
1130 1313 if ((*shbasep = kmem_alloc(*shsizep, KM_NOSLEEP)) == NULL)
1131 1314 return (ENOMEM);
1132 1315 } else {
1133 1316 *shbasep = kmem_alloc(*shsizep, KM_SLEEP);
1134 1317 }
1135 1318
1136 1319 if ((err = vn_rdwr(UIO_READ, vp, *shbasep, *shsizep,
1137 1320 (offset_t)ehdr->e_shoff, UIO_SYSSPACE, 0, (rlim64_t)0,
1138 1321 credp, &resid)) != 0) {
1139 1322 kmem_free(*shbasep, *shsizep);
1140 1323 return (err);
1141 1324 }
1142 1325
1143 1326 /*
1144 1327 * Pull the section string table out of the vnode; fail if the size
1145 1328 * is zero.
1146 1329 */
1147 1330 shdr = (Shdr *)(*shbasep + shstrndx * ehdr->e_shentsize);
1148 1331 if ((*shstrsizep = shdr->sh_size) == 0) {
1149 1332 kmem_free(*shbasep, *shsizep);
1150 1333 return (EINVAL);
1151 1334 }
1152 1335
1153 1336 if (*shstrsizep > elf_shstrtab_max) {
1154 1337 if ((*shstrbasep = kmem_alloc(*shstrsizep,
1155 1338 KM_NOSLEEP)) == NULL) {
1156 1339 kmem_free(*shbasep, *shsizep);
1157 1340 return (ENOMEM);
1158 1341 }
1159 1342 } else {
1160 1343 *shstrbasep = kmem_alloc(*shstrsizep, KM_SLEEP);
1161 1344 }
1162 1345
1163 1346 if ((err = vn_rdwr(UIO_READ, vp, *shstrbasep, *shstrsizep,
1164 1347 (offset_t)shdr->sh_offset, UIO_SYSSPACE, 0, (rlim64_t)0,
1165 1348 credp, &resid)) != 0) {
1166 1349 kmem_free(*shbasep, *shsizep);
1167 1350 kmem_free(*shstrbasep, *shstrsizep);
1168 1351 return (err);
1169 1352 }
|
↓ open down ↓ |
246 lines elided |
↑ open up ↑ |
1170 1353
1171 1354 /*
1172 1355 * Make sure the strtab is null-terminated to make sure we
1173 1356 * don't run off the end of the table.
1174 1357 */
1175 1358 (*shstrbasep)[*shstrsizep - 1] = '\0';
1176 1359
1177 1360 return (0);
1178 1361 }
1179 1362
1363 +
1364 +#ifdef _ELF32_COMPAT
1365 +int
1366 +elf32readhdr(vnode_t *vp, cred_t *credp, Ehdr *ehdrp, int *nphdrs,
1367 + caddr_t *phbasep, ssize_t *phsizep)
1368 +#else
1369 +int
1370 +elfreadhdr(vnode_t *vp, cred_t *credp, Ehdr *ehdrp, int *nphdrs,
1371 + caddr_t *phbasep, ssize_t *phsizep)
1372 +#endif
1373 +{
1374 + int error, nshdrs, shstrndx;
1375 +
1376 + if ((error = getelfhead(vp, credp, ehdrp, &nshdrs, &shstrndx,
1377 + nphdrs)) != 0 ||
1378 + (error = getelfphdr(vp, credp, ehdrp, *nphdrs, phbasep,
1379 + phsizep)) != 0) {
1380 + return (error);
1381 + }
1382 + return (0);
1383 +}
1384 +
1385 +
1180 1386 static int
1181 1387 mapelfexec(
1182 1388 vnode_t *vp,
1183 1389 Ehdr *ehdr,
1184 1390 int nphdrs,
1185 1391 caddr_t phdrbase,
1186 1392 Phdr **uphdr,
1187 1393 Phdr **dyphdr,
1188 1394 Phdr **stphdr,
1189 1395 Phdr **dtphdr,
1190 1396 Phdr *dataphdrp,
1191 1397 caddr_t *bssbase,
1192 1398 caddr_t *brkbase,
1193 1399 intptr_t *voffset,
1194 1400 intptr_t *minaddr,
1195 1401 size_t len,
1196 1402 long *execsz,
1197 1403 size_t *brksize)
1198 1404 {
1199 1405 Phdr *phdr;
1200 - int i, prot, error;
1406 + int i, prot, error, lastprot = 0;
1201 1407 caddr_t addr = NULL;
1202 1408 size_t zfodsz;
1203 1409 int ptload = 0;
1204 1410 int page;
1205 1411 off_t offset;
1206 1412 int hsize = ehdr->e_phentsize;
1207 1413 caddr_t mintmp = (caddr_t)-1;
1414 + uintptr_t lastaddr = NULL;
1208 1415 extern int use_brk_lpg;
1209 1416
1210 1417 if (ehdr->e_type == ET_DYN) {
1211 - /*
1212 - * Obtain the virtual address of a hole in the
1213 - * address space to map the "interpreter".
1214 - */
1215 - map_addr(&addr, len, (offset_t)0, 1, 0);
1216 - if (addr == NULL)
1217 - return (ENOMEM);
1218 - *voffset = (intptr_t)addr;
1418 + caddr_t vaddr;
1219 1419
1220 1420 /*
1221 - * Calculate the minimum vaddr so it can be subtracted out.
1222 - * According to the ELF specification, since PT_LOAD sections
1223 - * must be sorted by increasing p_vaddr values, this is
1224 - * guaranteed to be the first PT_LOAD section.
1421 + * Despite the fact that mmapobj(2) refuses to load them, we
1422 + * need to support executing ET_DYN objects that have a
1423 + * non-NULL p_vaddr. When found in the wild, these objects
1424 + * are likely to be due to an old (and largely obviated) Linux
1425 + * facility, prelink(8), that rewrites shared objects to
1426 + * prefer specific (disjoint) virtual address ranges. (Yes,
1427 + * this is putatively for performance -- and yes, it has
1428 + * limited applicability, many edge conditions and grisly
1429 + * failure modes; even for Linux, it's insane.) As ELF
1430 + * mandates that the PT_LOAD segments be in p_vaddr order, we
1431 + * find the lowest p_vaddr by finding the first PT_LOAD
1432 + * segment.
1225 1433 */
1226 1434 phdr = (Phdr *)phdrbase;
1227 1435 for (i = nphdrs; i > 0; i--) {
1228 1436 if (phdr->p_type == PT_LOAD) {
1229 - *voffset -= (uintptr_t)phdr->p_vaddr;
1437 + addr = (caddr_t)(uintptr_t)phdr->p_vaddr;
1230 1438 break;
1231 1439 }
1232 1440 phdr = (Phdr *)((caddr_t)phdr + hsize);
1233 1441 }
1234 1442
1443 + /*
1444 + * We have a non-zero p_vaddr in the first PT_LOAD segment --
1445 + * presumably because we're directly executing a prelink(8)'d
1446 + * ld-linux.so. While we could correctly execute such an
1447 + * object without locating it at its desired p_vaddr (it is,
1448 + * after all, still relocatable), our inner antiquarian
1449 + * derives a perverse pleasure in accommodating the steampunk
1450 + * prelink(8) contraption -- goggles on!
1451 + */
1452 + if ((vaddr = addr) != NULL) {
1453 + if (as_gap(curproc->p_as, len,
1454 + &addr, &len, AH_LO, NULL) == -1 || addr != vaddr) {
1455 + addr = NULL;
1456 + }
1457 + }
1458 +
1459 + if (addr == NULL) {
1460 + /*
1461 + * We either have a NULL p_vaddr (the common case, by
1462 + * many orders of magnitude) or we have a non-NULL
1463 + * p_vaddr and we were unable to obtain the specified
1464 + * VA range (presumably because it's an illegal
1465 + * address). Either way, obtain an address in which
1466 + * to map the interpreter.
1467 + */
1468 + map_addr(&addr, len, (offset_t)0, 1, 0);
1469 + if (addr == NULL)
1470 + return (ENOMEM);
1471 + }
1472 +
1473 + /*
1474 + * Our voffset is the difference between where we landed and
1475 + * where we wanted to be.
1476 + */
1477 + *voffset = (uintptr_t)addr - (uintptr_t)vaddr;
1235 1478 } else {
1236 1479 *voffset = 0;
1237 1480 }
1481 +
1238 1482 phdr = (Phdr *)phdrbase;
1239 1483 for (i = nphdrs; i > 0; i--) {
1240 1484 switch (phdr->p_type) {
1241 1485 case PT_LOAD:
1242 - if ((*dyphdr != NULL) && (*uphdr == NULL))
1243 - return (0);
1244 -
1245 1486 ptload = 1;
1246 1487 prot = PROT_USER;
1247 1488 if (phdr->p_flags & PF_R)
1248 1489 prot |= PROT_READ;
1249 1490 if (phdr->p_flags & PF_W)
1250 1491 prot |= PROT_WRITE;
1251 1492 if (phdr->p_flags & PF_X)
1252 1493 prot |= PROT_EXEC;
1253 1494
1254 1495 addr = (caddr_t)((uintptr_t)phdr->p_vaddr + *voffset);
1255 1496
1497 + if ((*dyphdr != NULL) && uphdr != NULL &&
1498 + (*uphdr == NULL)) {
1499 + /*
1500 + * The PT_PHDR program header is, strictly
1501 + * speaking, optional. If we find that this
1502 + * is missing, we will determine the location
1503 + * of the program headers based on the address
1504 + * of the lowest PT_LOAD segment (namely, this
1505 + * one): we subtract the p_offset to get to
1506 + * the ELF header and then add back the program
1507 + * header offset to get to the program headers.
1508 + * We then cons up a Phdr that corresponds to
1509 + * the (missing) PT_PHDR, setting the flags
1510 + * to 0 to denote that this is artificial and
1511 + * should (must) be freed by the caller.
1512 + */
1513 + Phdr *cons;
1514 +
1515 + cons = kmem_zalloc(sizeof (Phdr), KM_SLEEP);
1516 +
1517 + cons->p_flags = 0;
1518 + cons->p_type = PT_PHDR;
1519 + cons->p_vaddr = ((uintptr_t)addr -
1520 + phdr->p_offset) + ehdr->e_phoff;
1521 +
1522 + *uphdr = cons;
1523 + }
1524 +
1256 1525 /*
1257 1526 * Keep track of the segment with the lowest starting
1258 1527 * address.
1259 1528 */
1260 1529 if (addr < mintmp)
1261 1530 mintmp = addr;
1262 1531
1532 + /*
1533 + * Segments need not correspond to page boundaries:
1534 + * they are permitted to share a page. If two PT_LOAD
1535 + * segments share the same page, and the permissions
1536 + * of the segments differ, the behavior is historically
1537 + * that the permissions of the latter segment are used
1538 + * for the page that the two segments share. This is
1539 + * also historically a non-issue: binaries generated
1540 + * by most anything will make sure that two PT_LOAD
1541 + * segments with differing permissions don't actually
1542 + * share any pages. However, there exist some crazy
1543 + * things out there (including at least an obscure
1544 + * Portuguese teaching language called G-Portugol) that
1545 + * actually do the wrong thing and expect it to work:
1546 + * they have a segment with execute permission share
1547 + * a page with a subsequent segment that does not
1548 + * have execute permissions and expect the resulting
1549 + * shared page to in fact be executable. To accommodate
1550 + * such broken link editors, we take advantage of a
1551 + * latitude explicitly granted to the loader: it is
1552 + * permitted to make _any_ PT_LOAD segment executable
1553 + * (provided that it is readable or writable). If we
1554 + * see that we're sharing a page and that the previous
1555 + * page was executable, we will add execute permissions
1556 + * to our segment.
1557 + */
1558 + if (btop(lastaddr) == btop((uintptr_t)addr) &&
1559 + (phdr->p_flags & (PF_R | PF_W)) &&
1560 + (lastprot & PROT_EXEC)) {
1561 + prot |= PROT_EXEC;
1562 + }
1563 +
1564 + lastaddr = (uintptr_t)addr + phdr->p_filesz;
1565 + lastprot = prot;
1566 +
1263 1567 zfodsz = (size_t)phdr->p_memsz - phdr->p_filesz;
1264 1568
1265 1569 offset = phdr->p_offset;
1266 1570 if (((uintptr_t)offset & PAGEOFFSET) ==
1267 1571 ((uintptr_t)addr & PAGEOFFSET) &&
1268 1572 (!(vp->v_flag & VNOMAP))) {
1269 1573 page = 1;
1270 1574 } else {
1271 1575 page = 0;
1272 1576 }
1273 1577
1274 1578 /*
1275 1579 * Set the heap pagesize for OOB when the bss size
1276 1580 * is known and use_brk_lpg is not 0.
1277 1581 */
1278 1582 if (brksize != NULL && use_brk_lpg &&
1279 1583 zfodsz != 0 && phdr == dataphdrp &&
1280 1584 (prot & PROT_WRITE)) {
1281 1585 size_t tlen = P2NPHASE((uintptr_t)addr +
1282 1586 phdr->p_filesz, PAGESIZE);
1283 1587
1284 1588 if (zfodsz > tlen) {
1285 1589 curproc->p_brkpageszc =
1286 1590 page_szc(map_pgsz(MAPPGSZ_HEAP,
1287 1591 curproc, addr + phdr->p_filesz +
1288 1592 tlen, zfodsz - tlen, 0));
1289 1593 }
1290 1594 }
1291 1595
1292 1596 if (curproc->p_brkpageszc != 0 && phdr == dataphdrp &&
1293 1597 (prot & PROT_WRITE)) {
1294 1598 uint_t szc = curproc->p_brkpageszc;
1295 1599 size_t pgsz = page_get_pagesize(szc);
1296 1600 caddr_t ebss = addr + phdr->p_memsz;
1297 1601 size_t extra_zfodsz;
1298 1602
1299 1603 ASSERT(pgsz > PAGESIZE);
1300 1604
1301 1605 extra_zfodsz = P2NPHASE((uintptr_t)ebss, pgsz);
1302 1606
1303 1607 if (error = execmap(vp, addr, phdr->p_filesz,
1304 1608 zfodsz + extra_zfodsz, phdr->p_offset,
1305 1609 prot, page, szc))
1306 1610 goto bad;
1307 1611 if (brksize != NULL)
1308 1612 *brksize = extra_zfodsz;
1309 1613 } else {
1310 1614 if (error = execmap(vp, addr, phdr->p_filesz,
1311 1615 zfodsz, phdr->p_offset, prot, page, 0))
1312 1616 goto bad;
1313 1617 }
1314 1618
1315 1619 if (bssbase != NULL && addr >= *bssbase &&
1316 1620 phdr == dataphdrp) {
|
↓ open down ↓ |
44 lines elided |
↑ open up ↑ |
1317 1621 *bssbase = addr + phdr->p_filesz;
1318 1622 }
1319 1623 if (brkbase != NULL && addr >= *brkbase) {
1320 1624 *brkbase = addr + phdr->p_memsz;
1321 1625 }
1322 1626
1323 1627 *execsz += btopr(phdr->p_memsz);
1324 1628 break;
1325 1629
1326 1630 case PT_INTERP:
1327 - if (ptload)
1328 - goto bad;
1631 + /*
1632 + * The ELF specification is unequivocal about the
1633 + * PT_INTERP program header with respect to any PT_LOAD
1634 + * program header: "If it is present, it must precede
1635 + * any loadable segment entry." Linux, however, makes
1636 + * no attempt to enforce this -- which has allowed some
1637 + * binary editing tools to get away with generating
1638 + * invalid ELF binaries in the respect that PT_INTERP
1639 + * occurs after the first PT_LOAD program header. This
1640 + * is unfortunate (and of course, disappointing) but
1641 + * it's no worse than that: there is no reason that we
1642 + * can't process the PT_INTERP entry (if present) after
1643 + * one or more PT_LOAD entries. We therefore
1644 + * deliberately do not check ptload here and always
1645 + * store dyphdr to be the PT_INTERP program header.
1646 + */
1329 1647 *dyphdr = phdr;
1330 1648 break;
1331 1649
1332 1650 case PT_SHLIB:
1333 1651 *stphdr = phdr;
1334 1652 break;
1335 1653
1336 1654 case PT_PHDR:
1337 - if (ptload)
1655 + if (ptload || phdr->p_flags == 0)
1338 1656 goto bad;
1339 - *uphdr = phdr;
1657 +
1658 + if (uphdr != NULL)
1659 + *uphdr = phdr;
1660 +
1340 1661 break;
1341 1662
1342 1663 case PT_NULL:
1343 1664 case PT_DYNAMIC:
1344 1665 case PT_NOTE:
1345 1666 break;
1346 1667
1347 1668 case PT_SUNWDTRACE:
1348 1669 if (dtphdr != NULL)
1349 1670 *dtphdr = phdr;
1350 1671 break;
1351 1672
1352 1673 default:
1353 1674 break;
1354 1675 }
1355 1676 phdr = (Phdr *)((caddr_t)phdr + hsize);
1356 1677 }
1357 1678
1358 1679 if (minaddr != NULL) {
1359 1680 ASSERT(mintmp != (caddr_t)-1);
1360 1681 *minaddr = (intptr_t)mintmp;
1361 1682 }
1362 1683
1363 1684 return (0);
1364 1685 bad:
1365 1686 if (error == 0)
1366 1687 error = EINVAL;
1367 1688 return (error);
1368 1689 }
1369 1690
1370 1691 int
1371 1692 elfnote(vnode_t *vp, offset_t *offsetp, int type, int descsz, void *desc,
1372 1693 rlim64_t rlimit, cred_t *credp)
1373 1694 {
1374 1695 Note note;
1375 1696 int error;
1376 1697
1377 1698 bzero(¬e, sizeof (note));
1378 1699 bcopy("CORE", note.name, 4);
1379 1700 note.nhdr.n_type = type;
1380 1701 /*
1381 1702 * The System V ABI states that n_namesz must be the length of the
1382 1703 * string that follows the Nhdr structure including the terminating
1383 1704 * null. The ABI also specifies that sufficient padding should be
1384 1705 * included so that the description that follows the name string
1385 1706 * begins on a 4- or 8-byte boundary for 32- and 64-bit binaries
1386 1707 * respectively. However, since this change was not made correctly
1387 1708 * at the time of the 64-bit port, both 32- and 64-bit binaries
1388 1709 * descriptions are only guaranteed to begin on a 4-byte boundary.
1389 1710 */
1390 1711 note.nhdr.n_namesz = 5;
1391 1712 note.nhdr.n_descsz = roundup(descsz, sizeof (Word));
1392 1713
1393 1714 if (error = core_write(vp, UIO_SYSSPACE, *offsetp, ¬e,
1394 1715 sizeof (note), rlimit, credp))
1395 1716 return (error);
1396 1717
1397 1718 *offsetp += sizeof (note);
1398 1719
1399 1720 if (error = core_write(vp, UIO_SYSSPACE, *offsetp, desc,
1400 1721 note.nhdr.n_descsz, rlimit, credp))
1401 1722 return (error);
1402 1723
1403 1724 *offsetp += note.nhdr.n_descsz;
1404 1725 return (0);
1405 1726 }
1406 1727
1407 1728 /*
1408 1729 * Copy the section data from one vnode to the section of another vnode.
1409 1730 */
1410 1731 static void
1411 1732 copy_scn(Shdr *src, vnode_t *src_vp, Shdr *dst, vnode_t *dst_vp, Off *doffset,
1412 1733 void *buf, size_t size, cred_t *credp, rlim64_t rlimit)
1413 1734 {
1414 1735 ssize_t resid;
1415 1736 size_t len, n = src->sh_size;
1416 1737 offset_t off = 0;
1417 1738
1418 1739 while (n != 0) {
1419 1740 len = MIN(size, n);
1420 1741 if (vn_rdwr(UIO_READ, src_vp, buf, len, src->sh_offset + off,
1421 1742 UIO_SYSSPACE, 0, (rlim64_t)0, credp, &resid) != 0 ||
1422 1743 resid >= len ||
1423 1744 core_write(dst_vp, UIO_SYSSPACE, *doffset + off,
1424 1745 buf, len - resid, rlimit, credp) != 0) {
1425 1746 dst->sh_size = 0;
1426 1747 dst->sh_offset = 0;
1427 1748 return;
1428 1749 }
1429 1750
1430 1751 ASSERT(n >= len - resid);
1431 1752
1432 1753 n -= len - resid;
1433 1754 off += len - resid;
1434 1755 }
1435 1756
1436 1757 *doffset += src->sh_size;
1437 1758 }
1438 1759
1439 1760 #ifdef _ELF32_COMPAT
1440 1761 extern size_t elf_datasz_max;
1441 1762 #else
1442 1763 size_t elf_datasz_max = 1 * 1024 * 1024;
1443 1764 #endif
1444 1765
1445 1766 /*
1446 1767 * This function processes mappings that correspond to load objects to
1447 1768 * examine their respective sections for elfcore(). It's called once with
1448 1769 * v set to NULL to count the number of sections that we're going to need
1449 1770 * and then again with v set to some allocated buffer that we fill in with
1450 1771 * all the section data.
1451 1772 */
1452 1773 static int
1453 1774 process_scns(core_content_t content, proc_t *p, cred_t *credp, vnode_t *vp,
1454 1775 Shdr *v, int nv, rlim64_t rlimit, Off *doffsetp, int *nshdrsp)
1455 1776 {
1456 1777 vnode_t *lastvp = NULL;
1457 1778 struct seg *seg;
1458 1779 int i, j;
1459 1780 void *data = NULL;
1460 1781 size_t datasz = 0;
1461 1782 shstrtab_t shstrtab;
1462 1783 struct as *as = p->p_as;
1463 1784 int error = 0;
1464 1785
1465 1786 if (v != NULL)
1466 1787 shstrtab_init(&shstrtab);
1467 1788
1468 1789 i = 1;
1469 1790 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1470 1791 uint_t prot;
1471 1792 vnode_t *mvp;
1472 1793 void *tmp = NULL;
1473 1794 caddr_t saddr = seg->s_base;
1474 1795 caddr_t naddr;
1475 1796 caddr_t eaddr;
1476 1797 size_t segsize;
1477 1798
1478 1799 Ehdr ehdr;
1479 1800 int nshdrs, shstrndx, nphdrs;
1480 1801 caddr_t shbase;
1481 1802 ssize_t shsize;
1482 1803 char *shstrbase;
1483 1804 ssize_t shstrsize;
1484 1805
1485 1806 Shdr *shdr;
1486 1807 const char *name;
1487 1808 size_t sz;
1488 1809 uintptr_t off;
1489 1810
1490 1811 int ctf_ndx = 0;
1491 1812 int symtab_ndx = 0;
1492 1813
1493 1814 /*
1494 1815 * Since we're just looking for text segments of load
1495 1816 * objects, we only care about the protection bits; we don't
1496 1817 * care about the actual size of the segment so we use the
1497 1818 * reserved size. If the segment's size is zero, there's
1498 1819 * something fishy going on so we ignore this segment.
1499 1820 */
1500 1821 if (seg->s_ops != &segvn_ops ||
1501 1822 SEGOP_GETVP(seg, seg->s_base, &mvp) != 0 ||
1502 1823 mvp == lastvp || mvp == NULL || mvp->v_type != VREG ||
1503 1824 (segsize = pr_getsegsize(seg, 1)) == 0)
1504 1825 continue;
1505 1826
1506 1827 eaddr = saddr + segsize;
1507 1828 prot = pr_getprot(seg, 1, &tmp, &saddr, &naddr, eaddr);
1508 1829 pr_getprot_done(&tmp);
1509 1830
1510 1831 /*
1511 1832 * Skip this segment unless the protection bits look like
1512 1833 * what we'd expect for a text segment.
1513 1834 */
1514 1835 if ((prot & (PROT_WRITE | PROT_EXEC)) != PROT_EXEC)
1515 1836 continue;
1516 1837
1517 1838 if (getelfhead(mvp, credp, &ehdr, &nshdrs, &shstrndx,
1518 1839 &nphdrs) != 0 ||
1519 1840 getelfshdr(mvp, credp, &ehdr, nshdrs, shstrndx,
1520 1841 &shbase, &shsize, &shstrbase, &shstrsize) != 0)
1521 1842 continue;
1522 1843
1523 1844 off = ehdr.e_shentsize;
1524 1845 for (j = 1; j < nshdrs; j++, off += ehdr.e_shentsize) {
1525 1846 Shdr *symtab = NULL, *strtab;
1526 1847
1527 1848 shdr = (Shdr *)(shbase + off);
1528 1849
1529 1850 if (shdr->sh_name >= shstrsize)
1530 1851 continue;
1531 1852
1532 1853 name = shstrbase + shdr->sh_name;
1533 1854
1534 1855 if (strcmp(name, shstrtab_data[STR_CTF]) == 0) {
1535 1856 if ((content & CC_CONTENT_CTF) == 0 ||
1536 1857 ctf_ndx != 0)
1537 1858 continue;
1538 1859
1539 1860 if (shdr->sh_link > 0 &&
1540 1861 shdr->sh_link < nshdrs) {
1541 1862 symtab = (Shdr *)(shbase +
1542 1863 shdr->sh_link * ehdr.e_shentsize);
1543 1864 }
1544 1865
1545 1866 if (v != NULL && i < nv - 1) {
1546 1867 if (shdr->sh_size > datasz &&
1547 1868 shdr->sh_size <= elf_datasz_max) {
1548 1869 if (data != NULL)
1549 1870 kmem_free(data, datasz);
1550 1871
1551 1872 datasz = shdr->sh_size;
1552 1873 data = kmem_alloc(datasz,
1553 1874 KM_SLEEP);
1554 1875 }
1555 1876
1556 1877 v[i].sh_name = shstrtab_ndx(&shstrtab,
1557 1878 STR_CTF);
1558 1879 v[i].sh_addr = (Addr)(uintptr_t)saddr;
1559 1880 v[i].sh_type = SHT_PROGBITS;
1560 1881 v[i].sh_addralign = 4;
1561 1882 *doffsetp = roundup(*doffsetp,
1562 1883 v[i].sh_addralign);
1563 1884 v[i].sh_offset = *doffsetp;
1564 1885 v[i].sh_size = shdr->sh_size;
1565 1886 if (symtab == NULL) {
1566 1887 v[i].sh_link = 0;
1567 1888 } else if (symtab->sh_type ==
1568 1889 SHT_SYMTAB &&
1569 1890 symtab_ndx != 0) {
1570 1891 v[i].sh_link =
1571 1892 symtab_ndx;
1572 1893 } else {
1573 1894 v[i].sh_link = i + 1;
1574 1895 }
1575 1896
1576 1897 copy_scn(shdr, mvp, &v[i], vp,
1577 1898 doffsetp, data, datasz, credp,
1578 1899 rlimit);
1579 1900 }
1580 1901
1581 1902 ctf_ndx = i++;
1582 1903
1583 1904 /*
1584 1905 * We've already dumped the symtab.
1585 1906 */
1586 1907 if (symtab != NULL &&
1587 1908 symtab->sh_type == SHT_SYMTAB &&
1588 1909 symtab_ndx != 0)
1589 1910 continue;
1590 1911
1591 1912 } else if (strcmp(name,
1592 1913 shstrtab_data[STR_SYMTAB]) == 0) {
1593 1914 if ((content & CC_CONTENT_SYMTAB) == 0 ||
1594 1915 symtab != 0)
1595 1916 continue;
1596 1917
1597 1918 symtab = shdr;
1598 1919 }
1599 1920
1600 1921 if (symtab != NULL) {
1601 1922 if ((symtab->sh_type != SHT_DYNSYM &&
1602 1923 symtab->sh_type != SHT_SYMTAB) ||
1603 1924 symtab->sh_link == 0 ||
1604 1925 symtab->sh_link >= nshdrs)
1605 1926 continue;
1606 1927
1607 1928 strtab = (Shdr *)(shbase +
1608 1929 symtab->sh_link * ehdr.e_shentsize);
1609 1930
1610 1931 if (strtab->sh_type != SHT_STRTAB)
1611 1932 continue;
1612 1933
1613 1934 if (v != NULL && i < nv - 2) {
1614 1935 sz = MAX(symtab->sh_size,
1615 1936 strtab->sh_size);
1616 1937 if (sz > datasz &&
1617 1938 sz <= elf_datasz_max) {
1618 1939 if (data != NULL)
1619 1940 kmem_free(data, datasz);
1620 1941
1621 1942 datasz = sz;
1622 1943 data = kmem_alloc(datasz,
1623 1944 KM_SLEEP);
1624 1945 }
1625 1946
1626 1947 if (symtab->sh_type == SHT_DYNSYM) {
1627 1948 v[i].sh_name = shstrtab_ndx(
1628 1949 &shstrtab, STR_DYNSYM);
1629 1950 v[i + 1].sh_name = shstrtab_ndx(
1630 1951 &shstrtab, STR_DYNSTR);
1631 1952 } else {
1632 1953 v[i].sh_name = shstrtab_ndx(
1633 1954 &shstrtab, STR_SYMTAB);
1634 1955 v[i + 1].sh_name = shstrtab_ndx(
1635 1956 &shstrtab, STR_STRTAB);
1636 1957 }
1637 1958
1638 1959 v[i].sh_type = symtab->sh_type;
1639 1960 v[i].sh_addr = symtab->sh_addr;
1640 1961 if (ehdr.e_type == ET_DYN ||
1641 1962 v[i].sh_addr == 0)
1642 1963 v[i].sh_addr +=
1643 1964 (Addr)(uintptr_t)saddr;
1644 1965 v[i].sh_addralign =
1645 1966 symtab->sh_addralign;
1646 1967 *doffsetp = roundup(*doffsetp,
1647 1968 v[i].sh_addralign);
1648 1969 v[i].sh_offset = *doffsetp;
1649 1970 v[i].sh_size = symtab->sh_size;
1650 1971 v[i].sh_link = i + 1;
1651 1972 v[i].sh_entsize = symtab->sh_entsize;
1652 1973 v[i].sh_info = symtab->sh_info;
1653 1974
1654 1975 copy_scn(symtab, mvp, &v[i], vp,
1655 1976 doffsetp, data, datasz, credp,
1656 1977 rlimit);
1657 1978
1658 1979 v[i + 1].sh_type = SHT_STRTAB;
1659 1980 v[i + 1].sh_flags = SHF_STRINGS;
1660 1981 v[i + 1].sh_addr = symtab->sh_addr;
1661 1982 if (ehdr.e_type == ET_DYN ||
1662 1983 v[i + 1].sh_addr == 0)
1663 1984 v[i + 1].sh_addr +=
1664 1985 (Addr)(uintptr_t)saddr;
1665 1986 v[i + 1].sh_addralign =
1666 1987 strtab->sh_addralign;
1667 1988 *doffsetp = roundup(*doffsetp,
1668 1989 v[i + 1].sh_addralign);
1669 1990 v[i + 1].sh_offset = *doffsetp;
1670 1991 v[i + 1].sh_size = strtab->sh_size;
1671 1992
1672 1993 copy_scn(strtab, mvp, &v[i + 1], vp,
1673 1994 doffsetp, data, datasz, credp,
1674 1995 rlimit);
1675 1996 }
1676 1997
1677 1998 if (symtab->sh_type == SHT_SYMTAB)
1678 1999 symtab_ndx = i;
1679 2000 i += 2;
1680 2001 }
1681 2002 }
1682 2003
1683 2004 kmem_free(shstrbase, shstrsize);
1684 2005 kmem_free(shbase, shsize);
1685 2006
1686 2007 lastvp = mvp;
1687 2008 }
1688 2009
1689 2010 if (v == NULL) {
1690 2011 if (i == 1)
1691 2012 *nshdrsp = 0;
1692 2013 else
1693 2014 *nshdrsp = i + 1;
1694 2015 goto done;
1695 2016 }
1696 2017
1697 2018 if (i != nv - 1) {
1698 2019 cmn_err(CE_WARN, "elfcore: core dump failed for "
1699 2020 "process %d; address space is changing", p->p_pid);
1700 2021 error = EIO;
1701 2022 goto done;
1702 2023 }
1703 2024
1704 2025 v[i].sh_name = shstrtab_ndx(&shstrtab, STR_SHSTRTAB);
1705 2026 v[i].sh_size = shstrtab_size(&shstrtab);
1706 2027 v[i].sh_addralign = 1;
1707 2028 *doffsetp = roundup(*doffsetp, v[i].sh_addralign);
1708 2029 v[i].sh_offset = *doffsetp;
1709 2030 v[i].sh_flags = SHF_STRINGS;
1710 2031 v[i].sh_type = SHT_STRTAB;
1711 2032
1712 2033 if (v[i].sh_size > datasz) {
1713 2034 if (data != NULL)
1714 2035 kmem_free(data, datasz);
1715 2036
1716 2037 datasz = v[i].sh_size;
1717 2038 data = kmem_alloc(datasz,
1718 2039 KM_SLEEP);
1719 2040 }
1720 2041
1721 2042 shstrtab_dump(&shstrtab, data);
1722 2043
1723 2044 if ((error = core_write(vp, UIO_SYSSPACE, *doffsetp,
1724 2045 data, v[i].sh_size, rlimit, credp)) != 0)
1725 2046 goto done;
1726 2047
1727 2048 *doffsetp += v[i].sh_size;
1728 2049
1729 2050 done:
1730 2051 if (data != NULL)
1731 2052 kmem_free(data, datasz);
1732 2053
1733 2054 return (error);
1734 2055 }
1735 2056
1736 2057 int
1737 2058 elfcore(vnode_t *vp, proc_t *p, cred_t *credp, rlim64_t rlimit, int sig,
1738 2059 core_content_t content)
1739 2060 {
1740 2061 offset_t poffset, soffset;
1741 2062 Off doffset;
1742 2063 int error, i, nphdrs, nshdrs;
1743 2064 int overflow = 0;
1744 2065 struct seg *seg;
1745 2066 struct as *as = p->p_as;
1746 2067 union {
1747 2068 Ehdr ehdr;
1748 2069 Phdr phdr[1];
1749 2070 Shdr shdr[1];
1750 2071 } *bigwad;
1751 2072 size_t bigsize;
1752 2073 size_t phdrsz, shdrsz;
1753 2074 Ehdr *ehdr;
1754 2075 Phdr *v;
1755 2076 caddr_t brkbase;
1756 2077 size_t brksize;
1757 2078 caddr_t stkbase;
1758 2079 size_t stksize;
1759 2080 int ntries = 0;
1760 2081 klwp_t *lwp = ttolwp(curthread);
1761 2082
1762 2083 top:
1763 2084 /*
1764 2085 * Make sure we have everything we need (registers, etc.).
1765 2086 * All other lwps have already stopped and are in an orderly state.
1766 2087 */
1767 2088 ASSERT(p == ttoproc(curthread));
1768 2089 prstop(0, 0);
1769 2090
1770 2091 AS_LOCK_ENTER(as, RW_WRITER);
1771 2092 nphdrs = prnsegs(as, 0) + 2; /* two CORE note sections */
1772 2093
1773 2094 /*
1774 2095 * Count the number of section headers we're going to need.
1775 2096 */
1776 2097 nshdrs = 0;
1777 2098 if (content & (CC_CONTENT_CTF | CC_CONTENT_SYMTAB)) {
1778 2099 (void) process_scns(content, p, credp, NULL, NULL, NULL, 0,
1779 2100 NULL, &nshdrs);
1780 2101 }
1781 2102 AS_LOCK_EXIT(as);
1782 2103
1783 2104 ASSERT(nshdrs == 0 || nshdrs > 1);
1784 2105
1785 2106 /*
1786 2107 * The core file contents may required zero section headers, but if
1787 2108 * we overflow the 16 bits allotted to the program header count in
1788 2109 * the ELF header, we'll need that program header at index zero.
1789 2110 */
1790 2111 if (nshdrs == 0 && nphdrs >= PN_XNUM)
1791 2112 nshdrs = 1;
1792 2113
1793 2114 phdrsz = nphdrs * sizeof (Phdr);
1794 2115 shdrsz = nshdrs * sizeof (Shdr);
1795 2116
1796 2117 bigsize = MAX(sizeof (*bigwad), MAX(phdrsz, shdrsz));
1797 2118 bigwad = kmem_alloc(bigsize, KM_SLEEP);
1798 2119
1799 2120 ehdr = &bigwad->ehdr;
1800 2121 bzero(ehdr, sizeof (*ehdr));
1801 2122
1802 2123 ehdr->e_ident[EI_MAG0] = ELFMAG0;
1803 2124 ehdr->e_ident[EI_MAG1] = ELFMAG1;
1804 2125 ehdr->e_ident[EI_MAG2] = ELFMAG2;
1805 2126 ehdr->e_ident[EI_MAG3] = ELFMAG3;
1806 2127 ehdr->e_ident[EI_CLASS] = ELFCLASS;
1807 2128 ehdr->e_type = ET_CORE;
1808 2129
1809 2130 #if !defined(_LP64) || defined(_ELF32_COMPAT)
1810 2131
1811 2132 #if defined(__sparc)
1812 2133 ehdr->e_ident[EI_DATA] = ELFDATA2MSB;
1813 2134 ehdr->e_machine = EM_SPARC;
1814 2135 #elif defined(__i386) || defined(__i386_COMPAT)
1815 2136 ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
1816 2137 ehdr->e_machine = EM_386;
1817 2138 #else
1818 2139 #error "no recognized machine type is defined"
1819 2140 #endif
1820 2141
1821 2142 #else /* !defined(_LP64) || defined(_ELF32_COMPAT) */
1822 2143
1823 2144 #if defined(__sparc)
1824 2145 ehdr->e_ident[EI_DATA] = ELFDATA2MSB;
1825 2146 ehdr->e_machine = EM_SPARCV9;
1826 2147 #elif defined(__amd64)
1827 2148 ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
1828 2149 ehdr->e_machine = EM_AMD64;
1829 2150 #else
1830 2151 #error "no recognized 64-bit machine type is defined"
1831 2152 #endif
1832 2153
1833 2154 #endif /* !defined(_LP64) || defined(_ELF32_COMPAT) */
1834 2155
1835 2156 /*
1836 2157 * If the count of program headers or section headers or the index
1837 2158 * of the section string table can't fit in the mere 16 bits
1838 2159 * shortsightedly allotted to them in the ELF header, we use the
1839 2160 * extended formats and put the real values in the section header
1840 2161 * as index 0.
1841 2162 */
1842 2163 ehdr->e_version = EV_CURRENT;
1843 2164 ehdr->e_ehsize = sizeof (Ehdr);
1844 2165
1845 2166 if (nphdrs >= PN_XNUM)
1846 2167 ehdr->e_phnum = PN_XNUM;
1847 2168 else
1848 2169 ehdr->e_phnum = (unsigned short)nphdrs;
1849 2170
1850 2171 ehdr->e_phoff = sizeof (Ehdr);
1851 2172 ehdr->e_phentsize = sizeof (Phdr);
1852 2173
1853 2174 if (nshdrs > 0) {
1854 2175 if (nshdrs >= SHN_LORESERVE)
1855 2176 ehdr->e_shnum = 0;
1856 2177 else
1857 2178 ehdr->e_shnum = (unsigned short)nshdrs;
1858 2179
1859 2180 if (nshdrs - 1 >= SHN_LORESERVE)
1860 2181 ehdr->e_shstrndx = SHN_XINDEX;
1861 2182 else
1862 2183 ehdr->e_shstrndx = (unsigned short)(nshdrs - 1);
1863 2184
1864 2185 ehdr->e_shoff = ehdr->e_phoff + ehdr->e_phentsize * nphdrs;
1865 2186 ehdr->e_shentsize = sizeof (Shdr);
1866 2187 }
1867 2188
1868 2189 if (error = core_write(vp, UIO_SYSSPACE, (offset_t)0, ehdr,
1869 2190 sizeof (Ehdr), rlimit, credp))
1870 2191 goto done;
1871 2192
1872 2193 poffset = sizeof (Ehdr);
1873 2194 soffset = sizeof (Ehdr) + phdrsz;
1874 2195 doffset = sizeof (Ehdr) + phdrsz + shdrsz;
1875 2196
1876 2197 v = &bigwad->phdr[0];
1877 2198 bzero(v, phdrsz);
1878 2199
1879 2200 setup_old_note_header(&v[0], p);
1880 2201 v[0].p_offset = doffset = roundup(doffset, sizeof (Word));
1881 2202 doffset += v[0].p_filesz;
1882 2203
1883 2204 setup_note_header(&v[1], p);
1884 2205 v[1].p_offset = doffset = roundup(doffset, sizeof (Word));
1885 2206 doffset += v[1].p_filesz;
1886 2207
1887 2208 mutex_enter(&p->p_lock);
1888 2209
1889 2210 brkbase = p->p_brkbase;
1890 2211 brksize = p->p_brksize;
1891 2212
1892 2213 stkbase = p->p_usrstack - p->p_stksize;
1893 2214 stksize = p->p_stksize;
1894 2215
1895 2216 mutex_exit(&p->p_lock);
1896 2217
1897 2218 AS_LOCK_ENTER(as, RW_WRITER);
1898 2219 i = 2;
1899 2220 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1900 2221 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1901 2222 caddr_t saddr, naddr;
1902 2223 void *tmp = NULL;
1903 2224 extern struct seg_ops segspt_shmops;
1904 2225
1905 2226 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1906 2227 uint_t prot;
1907 2228 size_t size;
1908 2229 int type;
1909 2230 vnode_t *mvp;
1910 2231
1911 2232 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1912 2233 prot &= PROT_READ | PROT_WRITE | PROT_EXEC;
1913 2234 if ((size = (size_t)(naddr - saddr)) == 0)
1914 2235 continue;
1915 2236 if (i == nphdrs) {
1916 2237 overflow++;
1917 2238 continue;
1918 2239 }
1919 2240 v[i].p_type = PT_LOAD;
1920 2241 v[i].p_vaddr = (Addr)(uintptr_t)saddr;
1921 2242 v[i].p_memsz = size;
1922 2243 if (prot & PROT_READ)
1923 2244 v[i].p_flags |= PF_R;
1924 2245 if (prot & PROT_WRITE)
1925 2246 v[i].p_flags |= PF_W;
1926 2247 if (prot & PROT_EXEC)
1927 2248 v[i].p_flags |= PF_X;
1928 2249
1929 2250 /*
1930 2251 * Figure out which mappings to include in the core.
1931 2252 */
1932 2253 type = SEGOP_GETTYPE(seg, saddr);
1933 2254
1934 2255 if (saddr == stkbase && size == stksize) {
1935 2256 if (!(content & CC_CONTENT_STACK))
1936 2257 goto exclude;
1937 2258
1938 2259 } else if (saddr == brkbase && size == brksize) {
1939 2260 if (!(content & CC_CONTENT_HEAP))
1940 2261 goto exclude;
1941 2262
1942 2263 } else if (seg->s_ops == &segspt_shmops) {
1943 2264 if (type & MAP_NORESERVE) {
1944 2265 if (!(content & CC_CONTENT_DISM))
1945 2266 goto exclude;
1946 2267 } else {
1947 2268 if (!(content & CC_CONTENT_ISM))
1948 2269 goto exclude;
1949 2270 }
1950 2271
1951 2272 } else if (seg->s_ops != &segvn_ops) {
1952 2273 goto exclude;
1953 2274
1954 2275 } else if (type & MAP_SHARED) {
1955 2276 if (shmgetid(p, saddr) != SHMID_NONE) {
1956 2277 if (!(content & CC_CONTENT_SHM))
1957 2278 goto exclude;
1958 2279
1959 2280 } else if (SEGOP_GETVP(seg, seg->s_base,
1960 2281 &mvp) != 0 || mvp == NULL ||
1961 2282 mvp->v_type != VREG) {
1962 2283 if (!(content & CC_CONTENT_SHANON))
1963 2284 goto exclude;
1964 2285
1965 2286 } else {
1966 2287 if (!(content & CC_CONTENT_SHFILE))
1967 2288 goto exclude;
1968 2289 }
1969 2290
1970 2291 } else if (SEGOP_GETVP(seg, seg->s_base, &mvp) != 0 ||
1971 2292 mvp == NULL || mvp->v_type != VREG) {
1972 2293 if (!(content & CC_CONTENT_ANON))
1973 2294 goto exclude;
1974 2295
1975 2296 } else if (prot == (PROT_READ | PROT_EXEC)) {
1976 2297 if (!(content & CC_CONTENT_TEXT))
1977 2298 goto exclude;
1978 2299
1979 2300 } else if (prot == PROT_READ) {
1980 2301 if (!(content & CC_CONTENT_RODATA))
1981 2302 goto exclude;
1982 2303
1983 2304 } else {
1984 2305 if (!(content & CC_CONTENT_DATA))
1985 2306 goto exclude;
1986 2307 }
1987 2308
1988 2309 doffset = roundup(doffset, sizeof (Word));
1989 2310 v[i].p_offset = doffset;
1990 2311 v[i].p_filesz = size;
1991 2312 doffset += size;
1992 2313 exclude:
1993 2314 i++;
1994 2315 }
1995 2316 ASSERT(tmp == NULL);
1996 2317 }
1997 2318 AS_LOCK_EXIT(as);
1998 2319
1999 2320 if (overflow || i != nphdrs) {
2000 2321 if (ntries++ == 0) {
2001 2322 kmem_free(bigwad, bigsize);
2002 2323 overflow = 0;
2003 2324 goto top;
2004 2325 }
2005 2326 cmn_err(CE_WARN, "elfcore: core dump failed for "
2006 2327 "process %d; address space is changing", p->p_pid);
2007 2328 error = EIO;
2008 2329 goto done;
2009 2330 }
2010 2331
2011 2332 if ((error = core_write(vp, UIO_SYSSPACE, poffset,
2012 2333 v, phdrsz, rlimit, credp)) != 0)
2013 2334 goto done;
2014 2335
2015 2336 if ((error = write_old_elfnotes(p, sig, vp, v[0].p_offset, rlimit,
2016 2337 credp)) != 0)
2017 2338 goto done;
2018 2339
2019 2340 if ((error = write_elfnotes(p, sig, vp, v[1].p_offset, rlimit,
2020 2341 credp, content)) != 0)
2021 2342 goto done;
2022 2343
2023 2344 for (i = 2; i < nphdrs; i++) {
2024 2345 prkillinfo_t killinfo;
2025 2346 sigqueue_t *sq;
2026 2347 int sig, j;
2027 2348
2028 2349 if (v[i].p_filesz == 0)
2029 2350 continue;
2030 2351
2031 2352 /*
2032 2353 * If dumping out this segment fails, rather than failing
2033 2354 * the core dump entirely, we reset the size of the mapping
2034 2355 * to zero to indicate that the data is absent from the core
2035 2356 * file and or in the PF_SUNW_FAILURE flag to differentiate
2036 2357 * this from mappings that were excluded due to the core file
2037 2358 * content settings.
2038 2359 */
2039 2360 if ((error = core_seg(p, vp, v[i].p_offset,
2040 2361 (caddr_t)(uintptr_t)v[i].p_vaddr, v[i].p_filesz,
2041 2362 rlimit, credp)) == 0) {
2042 2363 continue;
2043 2364 }
2044 2365
2045 2366 if ((sig = lwp->lwp_cursig) == 0) {
2046 2367 /*
2047 2368 * We failed due to something other than a signal.
2048 2369 * Since the space reserved for the segment is now
2049 2370 * unused, we stash the errno in the first four
2050 2371 * bytes. This undocumented interface will let us
2051 2372 * understand the nature of the failure.
2052 2373 */
2053 2374 (void) core_write(vp, UIO_SYSSPACE, v[i].p_offset,
2054 2375 &error, sizeof (error), rlimit, credp);
2055 2376
2056 2377 v[i].p_filesz = 0;
2057 2378 v[i].p_flags |= PF_SUNW_FAILURE;
2058 2379 if ((error = core_write(vp, UIO_SYSSPACE,
2059 2380 poffset + sizeof (v[i]) * i, &v[i], sizeof (v[i]),
2060 2381 rlimit, credp)) != 0)
2061 2382 goto done;
2062 2383
2063 2384 continue;
2064 2385 }
2065 2386
2066 2387 /*
2067 2388 * We took a signal. We want to abort the dump entirely, but
2068 2389 * we also want to indicate what failed and why. We therefore
2069 2390 * use the space reserved for the first failing segment to
2070 2391 * write our error (which, for purposes of compatability with
2071 2392 * older core dump readers, we set to EINTR) followed by any
2072 2393 * siginfo associated with the signal.
2073 2394 */
2074 2395 bzero(&killinfo, sizeof (killinfo));
2075 2396 killinfo.prk_error = EINTR;
2076 2397
2077 2398 sq = sig == SIGKILL ? curproc->p_killsqp : lwp->lwp_curinfo;
2078 2399
2079 2400 if (sq != NULL) {
2080 2401 bcopy(&sq->sq_info, &killinfo.prk_info,
2081 2402 sizeof (sq->sq_info));
2082 2403 } else {
2083 2404 killinfo.prk_info.si_signo = lwp->lwp_cursig;
2084 2405 killinfo.prk_info.si_code = SI_NOINFO;
2085 2406 }
2086 2407
2087 2408 #if (defined(_SYSCALL32_IMPL) || defined(_LP64))
2088 2409 /*
2089 2410 * If this is a 32-bit process, we need to translate from the
2090 2411 * native siginfo to the 32-bit variant. (Core readers must
2091 2412 * always have the same data model as their target or must
2092 2413 * be aware of -- and compensate for -- data model differences.)
2093 2414 */
2094 2415 if (curproc->p_model == DATAMODEL_ILP32) {
2095 2416 siginfo32_t si32;
2096 2417
2097 2418 siginfo_kto32((k_siginfo_t *)&killinfo.prk_info, &si32);
2098 2419 bcopy(&si32, &killinfo.prk_info, sizeof (si32));
2099 2420 }
2100 2421 #endif
2101 2422
2102 2423 (void) core_write(vp, UIO_SYSSPACE, v[i].p_offset,
2103 2424 &killinfo, sizeof (killinfo), rlimit, credp);
2104 2425
2105 2426 /*
2106 2427 * For the segment on which we took the signal, indicate that
2107 2428 * its data now refers to a siginfo.
2108 2429 */
2109 2430 v[i].p_filesz = 0;
2110 2431 v[i].p_flags |= PF_SUNW_FAILURE | PF_SUNW_KILLED |
2111 2432 PF_SUNW_SIGINFO;
2112 2433
2113 2434 /*
2114 2435 * And for every other segment, indicate that its absence
2115 2436 * is due to a signal.
2116 2437 */
2117 2438 for (j = i + 1; j < nphdrs; j++) {
2118 2439 v[j].p_filesz = 0;
2119 2440 v[j].p_flags |= PF_SUNW_FAILURE | PF_SUNW_KILLED;
2120 2441 }
2121 2442
2122 2443 /*
2123 2444 * Finally, write out our modified program headers.
2124 2445 */
2125 2446 if ((error = core_write(vp, UIO_SYSSPACE,
2126 2447 poffset + sizeof (v[i]) * i, &v[i],
2127 2448 sizeof (v[i]) * (nphdrs - i), rlimit, credp)) != 0)
2128 2449 goto done;
2129 2450
2130 2451 break;
2131 2452 }
2132 2453
2133 2454 if (nshdrs > 0) {
2134 2455 bzero(&bigwad->shdr[0], shdrsz);
2135 2456
2136 2457 if (nshdrs >= SHN_LORESERVE)
2137 2458 bigwad->shdr[0].sh_size = nshdrs;
2138 2459
2139 2460 if (nshdrs - 1 >= SHN_LORESERVE)
2140 2461 bigwad->shdr[0].sh_link = nshdrs - 1;
2141 2462
2142 2463 if (nphdrs >= PN_XNUM)
2143 2464 bigwad->shdr[0].sh_info = nphdrs;
2144 2465
2145 2466 if (nshdrs > 1) {
2146 2467 AS_LOCK_ENTER(as, RW_WRITER);
2147 2468 if ((error = process_scns(content, p, credp, vp,
2148 2469 &bigwad->shdr[0], nshdrs, rlimit, &doffset,
2149 2470 NULL)) != 0) {
2150 2471 AS_LOCK_EXIT(as);
2151 2472 goto done;
2152 2473 }
2153 2474 AS_LOCK_EXIT(as);
2154 2475 }
2155 2476
2156 2477 if ((error = core_write(vp, UIO_SYSSPACE, soffset,
2157 2478 &bigwad->shdr[0], shdrsz, rlimit, credp)) != 0)
2158 2479 goto done;
2159 2480 }
2160 2481
2161 2482 done:
2162 2483 kmem_free(bigwad, bigsize);
2163 2484 return (error);
2164 2485 }
2165 2486
2166 2487 #ifndef _ELF32_COMPAT
2167 2488
2168 2489 static struct execsw esw = {
2169 2490 #ifdef _LP64
2170 2491 elf64magicstr,
2171 2492 #else /* _LP64 */
2172 2493 elf32magicstr,
2173 2494 #endif /* _LP64 */
2174 2495 0,
2175 2496 5,
2176 2497 elfexec,
2177 2498 elfcore
|
↓ open down ↓ |
828 lines elided |
↑ open up ↑ |
2178 2499 };
2179 2500
2180 2501 static struct modlexec modlexec = {
2181 2502 &mod_execops, "exec module for elf", &esw
2182 2503 };
2183 2504
2184 2505 #ifdef _LP64
2185 2506 extern int elf32exec(vnode_t *vp, execa_t *uap, uarg_t *args,
2186 2507 intpdata_t *idatap, int level, long *execsz,
2187 2508 int setid, caddr_t exec_file, cred_t *cred,
2188 - int brand_action);
2509 + int *brand_action);
2189 2510 extern int elf32core(vnode_t *vp, proc_t *p, cred_t *credp,
2190 2511 rlim64_t rlimit, int sig, core_content_t content);
2191 2512
2192 2513 static struct execsw esw32 = {
2193 2514 elf32magicstr,
2194 2515 0,
2195 2516 5,
2196 2517 elf32exec,
2197 2518 elf32core
2198 2519 };
2199 2520
2200 2521 static struct modlexec modlexec32 = {
2201 2522 &mod_execops, "32-bit exec module for elf", &esw32
2202 2523 };
2203 2524 #endif /* _LP64 */
2204 2525
2205 2526 static struct modlinkage modlinkage = {
2206 2527 MODREV_1,
2207 2528 (void *)&modlexec,
2208 2529 #ifdef _LP64
2209 2530 (void *)&modlexec32,
2210 2531 #endif /* _LP64 */
2211 2532 NULL
2212 2533 };
2213 2534
2214 2535 int
2215 2536 _init(void)
2216 2537 {
2217 2538 return (mod_install(&modlinkage));
2218 2539 }
2219 2540
2220 2541 int
2221 2542 _fini(void)
2222 2543 {
2223 2544 return (mod_remove(&modlinkage));
2224 2545 }
2225 2546
2226 2547 int
2227 2548 _info(struct modinfo *modinfop)
2228 2549 {
2229 2550 return (mod_info(&modlinkage, modinfop));
2230 2551 }
2231 2552
2232 2553 #endif /* !_ELF32_COMPAT */
|
↓ open down ↓ |
34 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX