Print this page
13925 core files should include DWARF (fix mismerge)
13925 core files should include DWARF
Reviewed by: Rich Lowe <richlowe@richlowe.net>
Reviewed by: C Fraire <cfraire@me.com>
Reviewed by: Adam Leventhal <adam.leventhal@gmail.com>
Approved by: Dan McDonald <danmcd@joyent.com>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/exec/elf/elf.c
+++ new/usr/src/uts/common/exec/elf/elf.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 27 /* All Rights Reserved */
28 28 /*
29 29 * Copyright 2019 Joyent, Inc.
30 30 * Copyright 2021 Oxide Computer Company
31 31 */
32 32
33 33 #include <sys/types.h>
34 34 #include <sys/param.h>
35 35 #include <sys/thread.h>
36 36 #include <sys/sysmacros.h>
37 37 #include <sys/signal.h>
38 38 #include <sys/cred.h>
39 39 #include <sys/user.h>
40 40 #include <sys/errno.h>
41 41 #include <sys/vnode.h>
42 42 #include <sys/mman.h>
43 43 #include <sys/kmem.h>
44 44 #include <sys/proc.h>
45 45 #include <sys/pathname.h>
46 46 #include <sys/policy.h>
47 47 #include <sys/cmn_err.h>
48 48 #include <sys/systm.h>
49 49 #include <sys/elf.h>
50 50 #include <sys/vmsystm.h>
51 51 #include <sys/debug.h>
52 52 #include <sys/auxv.h>
53 53 #include <sys/exec.h>
54 54 #include <sys/prsystm.h>
55 55 #include <vm/as.h>
56 56 #include <vm/rm.h>
57 57 #include <vm/seg.h>
58 58 #include <vm/seg_vn.h>
59 59 #include <sys/modctl.h>
60 60 #include <sys/systeminfo.h>
61 61 #include <sys/vmparam.h>
|
↓ open down ↓ |
61 lines elided |
↑ open up ↑ |
62 62 #include <sys/machelf.h>
63 63 #include <sys/shm_impl.h>
64 64 #include <sys/archsystm.h>
65 65 #include <sys/fasttrap.h>
66 66 #include <sys/brand.h>
67 67 #include "elf_impl.h"
68 68 #include <sys/sdt.h>
69 69 #include <sys/siginfo.h>
70 70 #include <sys/random.h>
71 71
72 +#include <core_shstrtab.h>
73 +
72 74 #if defined(__x86)
73 75 #include <sys/comm_page_util.h>
74 76 #include <sys/fp.h>
75 77 #endif /* defined(__x86) */
76 78
77 79
78 80 extern int at_flags;
79 81 extern volatile size_t aslr_max_brk_skew;
80 82
81 83 #define ORIGIN_STR "ORIGIN"
82 84 #define ORIGIN_STR_SIZE 6
83 85
84 86 static int getelfhead(vnode_t *, cred_t *, Ehdr *, uint_t *, uint_t *,
85 87 uint_t *);
86 88 static int getelfphdr(vnode_t *, cred_t *, const Ehdr *, uint_t, caddr_t *,
87 89 size_t *);
88 90 static int getelfshdr(vnode_t *, cred_t *, const Ehdr *, uint_t, uint_t,
89 91 caddr_t *, size_t *, caddr_t *, size_t *);
90 92 static size_t elfsize(const Ehdr *, uint_t, const caddr_t, uintptr_t *);
91 93 static int mapelfexec(vnode_t *, Ehdr *, uint_t, caddr_t, Phdr **, Phdr **,
92 94 Phdr **, Phdr **, Phdr *, caddr_t *, caddr_t *, intptr_t *, uintptr_t *,
93 95 size_t, size_t *, size_t *);
94 96
95 97 #ifdef _ELF32_COMPAT
96 98 /* Link against the non-compat instances when compiling the 32-bit version. */
97 99 extern size_t elf_datasz_max;
98 100 extern size_t elf_zeropg_sz;
99 101 extern void elf_ctx_resize_scratch(elf_core_ctx_t *, size_t);
100 102 extern uint_t elf_nphdr_max;
|
↓ open down ↓ |
19 lines elided |
↑ open up ↑ |
101 103 extern uint_t elf_nshdr_max;
102 104 extern size_t elf_shstrtab_max;
103 105 #else
104 106 size_t elf_datasz_max = 1 * 1024 * 1024;
105 107 size_t elf_zeropg_sz = 4 * 1024;
106 108 uint_t elf_nphdr_max = 1000;
107 109 uint_t elf_nshdr_max = 10000;
108 110 size_t elf_shstrtab_max = 100 * 1024;
109 111 #endif
110 112
111 -
112 -
113 -typedef enum {
114 - STR_CTF,
115 - STR_SYMTAB,
116 - STR_DYNSYM,
117 - STR_STRTAB,
118 - STR_DYNSTR,
119 - STR_SHSTRTAB,
120 - STR_NUM
121 -} shstrtype_t;
122 -
123 -static const char *shstrtab_data[] = {
124 - ".SUNW_ctf",
125 - ".symtab",
126 - ".dynsym",
127 - ".strtab",
128 - ".dynstr",
129 - ".shstrtab"
130 -};
131 -
132 -typedef struct shstrtab {
133 - uint_t sst_ndx[STR_NUM];
134 - uint_t sst_cur;
135 -} shstrtab_t;
136 -
137 -static void
138 -shstrtab_init(shstrtab_t *s)
139 -{
140 - bzero(&s->sst_ndx, sizeof (s->sst_ndx));
141 - s->sst_cur = 1;
142 -}
143 -
144 -static uint_t
145 -shstrtab_ndx(shstrtab_t *s, shstrtype_t type)
146 -{
147 - uint_t ret;
148 -
149 - if ((ret = s->sst_ndx[type]) != 0)
150 - return (ret);
151 -
152 - ret = s->sst_ndx[type] = s->sst_cur;
153 - s->sst_cur += strlen(shstrtab_data[type]) + 1;
154 -
155 - return (ret);
156 -}
157 -
158 -static size_t
159 -shstrtab_size(const shstrtab_t *s)
160 -{
161 - return (s->sst_cur);
162 -}
163 -
164 -static void
165 -shstrtab_dump(const shstrtab_t *s, char *buf)
166 -{
167 - uint_t i, ndx;
168 -
169 - *buf = '\0';
170 - for (i = 0; i < STR_NUM; i++) {
171 - if ((ndx = s->sst_ndx[i]) != 0)
172 - (void) strcpy(buf + ndx, shstrtab_data[i]);
173 - }
174 -}
175 -
176 113 static int
177 114 dtrace_safe_phdr(Phdr *phdrp, struct uarg *args, uintptr_t base)
178 115 {
179 116 ASSERT(phdrp->p_type == PT_SUNWDTRACE);
180 117
181 118 /*
182 119 * See the comment in fasttrap.h for information on how to safely
183 120 * update this program header.
184 121 */
185 122 if (phdrp->p_memsz < PT_SUNWDTRACE_SIZE ||
186 123 (phdrp->p_flags & (PF_R | PF_W | PF_X)) != (PF_R | PF_W | PF_X))
187 124 return (-1);
188 125
189 126 args->thrptr = phdrp->p_vaddr + base;
190 127
191 128 return (0);
192 129 }
193 130
194 131 static int
195 132 handle_secflag_dt(proc_t *p, uint_t dt, uint_t val)
196 133 {
197 134 uint_t flag;
198 135
199 136 switch (dt) {
200 137 case DT_SUNW_ASLR:
201 138 flag = PROC_SEC_ASLR;
202 139 break;
203 140 default:
204 141 return (EINVAL);
205 142 }
206 143
207 144 if (val == 0) {
208 145 if (secflag_isset(p->p_secflags.psf_lower, flag))
209 146 return (EPERM);
210 147 if ((secpolicy_psecflags(CRED(), p, p) != 0) &&
211 148 secflag_isset(p->p_secflags.psf_inherit, flag))
212 149 return (EPERM);
213 150
214 151 secflag_clear(&p->p_secflags.psf_effective, flag);
215 152 } else {
216 153 if (!secflag_isset(p->p_secflags.psf_upper, flag))
217 154 return (EPERM);
218 155
219 156 if ((secpolicy_psecflags(CRED(), p, p) != 0) &&
220 157 !secflag_isset(p->p_secflags.psf_inherit, flag))
221 158 return (EPERM);
222 159
223 160 secflag_set(&p->p_secflags.psf_effective, flag);
224 161 }
225 162
226 163 return (0);
227 164 }
228 165
229 166
230 167 #ifndef _ELF32_COMPAT
231 168 void
232 169 elf_ctx_resize_scratch(elf_core_ctx_t *ctx, size_t sz)
233 170 {
234 171 size_t target = MIN(sz, elf_datasz_max);
235 172
236 173 if (target > ctx->ecc_bufsz) {
237 174 if (ctx->ecc_buf != NULL) {
238 175 kmem_free(ctx->ecc_buf, ctx->ecc_bufsz);
239 176 }
240 177 ctx->ecc_buf = kmem_alloc(target, KM_SLEEP);
241 178 ctx->ecc_bufsz = target;
242 179 }
243 180 }
244 181 #endif /* _ELF32_COMPAT */
245 182
246 183 /*
247 184 * Map in the executable pointed to by vp. Returns 0 on success. Note that
248 185 * this function currently has the maximum number of arguments allowed by
249 186 * modstubs on x86 (MAXNARG)! Do _not_ add to this function signature without
250 187 * adding to MAXNARG. (Better yet, do not add to this monster of a function
251 188 * signature!)
252 189 */
253 190 int
254 191 mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
255 192 intptr_t *voffset, caddr_t exec_file, char **interpp, caddr_t *bssbase,
256 193 caddr_t *brkbase, size_t *brksize, uintptr_t *lddatap, uintptr_t *minaddrp)
257 194 {
258 195 size_t len, phdrsize;
259 196 struct vattr vat;
260 197 caddr_t phdrbase = NULL;
261 198 uint_t nshdrs, shstrndx, nphdrs;
262 199 int error = 0;
263 200 Phdr *uphdr = NULL;
264 201 Phdr *junk = NULL;
265 202 Phdr *dynphdr = NULL;
266 203 Phdr *dtrphdr = NULL;
267 204 char *interp = NULL;
268 205 uintptr_t lddata, minaddr;
269 206 size_t execsz;
270 207
271 208 if (lddatap != NULL)
272 209 *lddatap = 0;
273 210
274 211 if (minaddrp != NULL)
275 212 *minaddrp = (uintptr_t)NULL;
276 213
277 214 if (error = execpermissions(vp, &vat, args)) {
278 215 uprintf("%s: Cannot execute %s\n", exec_file, args->pathname);
279 216 return (error);
280 217 }
281 218
282 219 if ((error = getelfhead(vp, CRED(), ehdr, &nshdrs, &shstrndx,
283 220 &nphdrs)) != 0 ||
284 221 (error = getelfphdr(vp, CRED(), ehdr, nphdrs, &phdrbase,
285 222 &phdrsize)) != 0) {
286 223 uprintf("%s: Cannot read %s\n", exec_file, args->pathname);
287 224 return (error);
288 225 }
289 226
290 227 if ((len = elfsize(ehdr, nphdrs, phdrbase, &lddata)) == 0) {
291 228 uprintf("%s: Nothing to load in %s", exec_file, args->pathname);
292 229 kmem_free(phdrbase, phdrsize);
293 230 return (ENOEXEC);
294 231 }
295 232 if (lddatap != NULL)
296 233 *lddatap = lddata;
297 234
298 235 if (error = mapelfexec(vp, ehdr, nphdrs, phdrbase, &uphdr, &dynphdr,
299 236 &junk, &dtrphdr, NULL, bssbase, brkbase, voffset, &minaddr,
300 237 len, &execsz, brksize)) {
301 238 uprintf("%s: Cannot map %s\n", exec_file, args->pathname);
302 239 if (uphdr != NULL && uphdr->p_flags == 0)
303 240 kmem_free(uphdr, sizeof (Phdr));
304 241 kmem_free(phdrbase, phdrsize);
305 242 return (error);
306 243 }
307 244
308 245 if (minaddrp != NULL)
309 246 *minaddrp = minaddr;
310 247
311 248 /*
312 249 * If the executable requires an interpreter, determine its name.
313 250 */
314 251 if (dynphdr != NULL) {
315 252 ssize_t resid;
316 253
317 254 if (dynphdr->p_filesz > MAXPATHLEN || dynphdr->p_filesz == 0) {
318 255 uprintf("%s: Invalid interpreter\n", exec_file);
319 256 kmem_free(phdrbase, phdrsize);
320 257 return (ENOEXEC);
321 258 }
322 259
323 260 interp = kmem_alloc(MAXPATHLEN, KM_SLEEP);
324 261
325 262 if ((error = vn_rdwr(UIO_READ, vp, interp,
326 263 (ssize_t)dynphdr->p_filesz,
327 264 (offset_t)dynphdr->p_offset, UIO_SYSSPACE, 0,
328 265 (rlim64_t)0, CRED(), &resid)) != 0 || resid != 0 ||
329 266 interp[dynphdr->p_filesz - 1] != '\0') {
330 267 uprintf("%s: Cannot obtain interpreter pathname\n",
331 268 exec_file);
332 269 kmem_free(interp, MAXPATHLEN);
333 270 kmem_free(phdrbase, phdrsize);
334 271 return (error != 0 ? error : ENOEXEC);
335 272 }
336 273 }
337 274
338 275 /*
339 276 * If this is a statically linked executable, voffset should indicate
340 277 * the address of the executable itself (it normally holds the address
341 278 * of the interpreter).
342 279 */
343 280 if (ehdr->e_type == ET_EXEC && interp == NULL)
344 281 *voffset = minaddr;
345 282
346 283 /*
347 284 * If the caller has asked for the interpreter name, return it (it's
348 285 * up to the caller to free it); if the caller hasn't asked for it,
349 286 * free it ourselves.
350 287 */
351 288 if (interpp != NULL) {
352 289 *interpp = interp;
353 290 } else if (interp != NULL) {
354 291 kmem_free(interp, MAXPATHLEN);
355 292 }
356 293
357 294 if (uphdr != NULL) {
358 295 *uphdr_vaddr = uphdr->p_vaddr;
359 296
360 297 if (uphdr->p_flags == 0)
361 298 kmem_free(uphdr, sizeof (Phdr));
362 299 } else if (ehdr->e_type == ET_DYN) {
363 300 /*
364 301 * If we don't have a uphdr, we'll apply the logic found
365 302 * in mapelfexec() and use the p_vaddr of the first PT_LOAD
366 303 * section as the base address of the object.
367 304 */
368 305 const Phdr *phdr = (Phdr *)phdrbase;
369 306 const uint_t hsize = ehdr->e_phentsize;
370 307 uint_t i;
371 308
372 309 for (i = nphdrs; i > 0; i--) {
373 310 if (phdr->p_type == PT_LOAD) {
374 311 *uphdr_vaddr = (uintptr_t)phdr->p_vaddr +
375 312 ehdr->e_phoff;
376 313 break;
377 314 }
378 315
379 316 phdr = (Phdr *)((caddr_t)phdr + hsize);
380 317 }
381 318
382 319 /*
383 320 * If we don't have a PT_LOAD segment, we should have returned
384 321 * ENOEXEC when elfsize() returned 0, above.
385 322 */
386 323 VERIFY(i > 0);
387 324 } else {
388 325 *uphdr_vaddr = (Addr)-1;
389 326 }
390 327
391 328 kmem_free(phdrbase, phdrsize);
392 329 return (error);
393 330 }
394 331
395 332 /*ARGSUSED*/
396 333 int
397 334 elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
398 335 int level, size_t *execsz, int setid, caddr_t exec_file, cred_t *cred,
399 336 int *brand_action)
400 337 {
401 338 caddr_t phdrbase = NULL;
402 339 caddr_t bssbase = 0;
403 340 caddr_t brkbase = 0;
404 341 size_t brksize = 0;
405 342 size_t dlnsize, nsize = 0;
406 343 aux_entry_t *aux;
407 344 int error;
408 345 ssize_t resid;
409 346 int fd = -1;
410 347 intptr_t voffset;
411 348 Phdr *intphdr = NULL;
412 349 Phdr *dynamicphdr = NULL;
413 350 Phdr *stphdr = NULL;
414 351 Phdr *uphdr = NULL;
415 352 Phdr *junk = NULL;
416 353 size_t len;
417 354 size_t postfixsize = 0;
418 355 size_t i;
419 356 Phdr *phdrp;
420 357 Phdr *dataphdrp = NULL;
421 358 Phdr *dtrphdr;
422 359 Phdr *capphdr = NULL;
423 360 Cap *cap = NULL;
424 361 size_t capsize;
425 362 int hasu = 0;
426 363 int hasauxv = 0;
427 364 int hasintp = 0;
428 365 int branded = 0;
429 366 int dynuphdr = 0;
430 367
431 368 struct proc *p = ttoproc(curthread);
432 369 struct user *up = PTOU(p);
433 370 struct bigwad {
434 371 Ehdr ehdr;
435 372 aux_entry_t elfargs[__KERN_NAUXV_IMPL];
436 373 char dl_name[MAXPATHLEN];
437 374 char pathbuf[MAXPATHLEN];
438 375 struct vattr vattr;
439 376 struct execenv exenv;
440 377 } *bigwad; /* kmem_alloc this behemoth so we don't blow stack */
441 378 Ehdr *ehdrp;
442 379 uint_t nshdrs, shstrndx, nphdrs;
443 380 size_t phdrsize;
444 381 char *dlnp;
445 382 char *pathbufp;
446 383 rlim64_t limit;
447 384 rlim64_t roundlimit;
448 385
449 386 ASSERT(p->p_model == DATAMODEL_ILP32 || p->p_model == DATAMODEL_LP64);
450 387
451 388 bigwad = kmem_alloc(sizeof (struct bigwad), KM_SLEEP);
452 389 ehdrp = &bigwad->ehdr;
453 390 dlnp = bigwad->dl_name;
454 391 pathbufp = bigwad->pathbuf;
455 392
456 393 /*
457 394 * Obtain ELF and program header information.
458 395 */
459 396 if ((error = getelfhead(vp, CRED(), ehdrp, &nshdrs, &shstrndx,
460 397 &nphdrs)) != 0 ||
461 398 (error = getelfphdr(vp, CRED(), ehdrp, nphdrs, &phdrbase,
462 399 &phdrsize)) != 0)
463 400 goto out;
464 401
465 402 /*
466 403 * Prevent executing an ELF file that has no entry point.
467 404 */
468 405 if (ehdrp->e_entry == 0) {
469 406 uprintf("%s: Bad entry point\n", exec_file);
470 407 goto bad;
471 408 }
472 409
473 410 /*
474 411 * Put data model that we're exec-ing to into the args passed to
475 412 * exec_args(), so it will know what it is copying to on new stack.
476 413 * Now that we know whether we are exec-ing a 32-bit or 64-bit
477 414 * executable, we can set execsz with the appropriate NCARGS.
478 415 */
479 416 #ifdef _LP64
480 417 if (ehdrp->e_ident[EI_CLASS] == ELFCLASS32) {
481 418 args->to_model = DATAMODEL_ILP32;
482 419 *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS32-1);
483 420 } else {
484 421 args->to_model = DATAMODEL_LP64;
485 422 if (!args->stk_prot_override) {
486 423 args->stk_prot &= ~PROT_EXEC;
487 424 }
488 425 #if defined(__x86)
489 426 args->dat_prot &= ~PROT_EXEC;
490 427 #endif
491 428 *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS64-1);
492 429 }
493 430 #else /* _LP64 */
494 431 args->to_model = DATAMODEL_ILP32;
495 432 *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS-1);
496 433 #endif /* _LP64 */
497 434
498 435 /*
499 436 * We delay invoking the brand callback until we've figured out what
500 437 * kind of elf binary we're trying to run, 32-bit or 64-bit. We do this
501 438 * because now the brand library can just check args->to_model to see if
502 439 * the target is 32-bit or 64-bit without having do duplicate all the
503 440 * code above.
504 441 *
505 442 * We also give the brand a chance to indicate that based on the ELF
506 443 * OSABI of the target binary it should become unbranded and optionally
507 444 * indicate that it should be treated as existing in a specific prefix.
508 445 *
509 446 * Note that if a brand opts to go down this route it does not actually
510 447 * end up being debranded. In other words, future programs that exec
511 448 * will still be considered for branding unless this escape hatch is
512 449 * used. Consider the case of lx brand for example. If a user runs
513 450 * /native/usr/sbin/dtrace -c /bin/ls, the isaexec and normal executable
514 451 * of DTrace that's in /native will take this escape hatch and be run
515 452 * and interpreted using the normal system call table; however, the
516 453 * execution of a non-illumos binary in the form of /bin/ls will still
517 454 * be branded and be subject to all of the normal actions of the brand.
518 455 *
519 456 * The level checks associated with brand handling below are used to
520 457 * prevent a loop since the brand elfexec function typically comes back
521 458 * through this function. We must check <= here since the nested
522 459 * handling in the #! interpreter code will increment the level before
523 460 * calling gexec to run the final elfexec interpreter.
524 461 */
525 462 if ((level <= INTP_MAXDEPTH) && (*brand_action != EBA_NATIVE) &&
526 463 (PROC_IS_BRANDED(p)) && (BROP(p)->b_native_exec != NULL)) {
527 464 if (BROP(p)->b_native_exec(ehdrp->e_ident[EI_OSABI],
528 465 &args->brand_nroot) == B_TRUE) {
529 466 ASSERT(ehdrp->e_ident[EI_OSABI]);
530 467 *brand_action = EBA_NATIVE;
531 468 /* Add one for the trailing '/' in the path */
532 469 if (args->brand_nroot != NULL)
533 470 nsize = strlen(args->brand_nroot) + 1;
534 471 }
535 472 }
536 473
537 474 if ((level <= INTP_MAXDEPTH) &&
538 475 (*brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
539 476 error = BROP(p)->b_elfexec(vp, uap, args,
540 477 idatap, level + 1, execsz, setid, exec_file, cred,
541 478 brand_action);
542 479 goto out;
543 480 }
544 481
545 482 /*
546 483 * Determine aux size now so that stack can be built
547 484 * in one shot (except actual copyout of aux image),
548 485 * determine any non-default stack protections,
549 486 * and still have this code be machine independent.
550 487 */
551 488 const uint_t hsize = ehdrp->e_phentsize;
552 489 phdrp = (Phdr *)phdrbase;
553 490 for (i = nphdrs; i > 0; i--) {
554 491 switch (phdrp->p_type) {
555 492 case PT_INTERP:
556 493 hasauxv = hasintp = 1;
557 494 break;
558 495 case PT_PHDR:
559 496 hasu = 1;
560 497 break;
561 498 case PT_SUNWSTACK:
562 499 args->stk_prot = PROT_USER;
563 500 if (phdrp->p_flags & PF_R)
564 501 args->stk_prot |= PROT_READ;
565 502 if (phdrp->p_flags & PF_W)
566 503 args->stk_prot |= PROT_WRITE;
567 504 if (phdrp->p_flags & PF_X)
568 505 args->stk_prot |= PROT_EXEC;
569 506 break;
570 507 case PT_LOAD:
571 508 dataphdrp = phdrp;
572 509 break;
573 510 case PT_SUNWCAP:
574 511 capphdr = phdrp;
575 512 break;
576 513 case PT_DYNAMIC:
577 514 dynamicphdr = phdrp;
578 515 break;
579 516 }
580 517 phdrp = (Phdr *)((caddr_t)phdrp + hsize);
581 518 }
582 519
583 520 if (ehdrp->e_type != ET_EXEC) {
584 521 dataphdrp = NULL;
585 522 hasauxv = 1;
586 523 }
587 524
588 525 /* Copy BSS permissions to args->dat_prot */
589 526 if (dataphdrp != NULL) {
590 527 args->dat_prot = PROT_USER;
591 528 if (dataphdrp->p_flags & PF_R)
592 529 args->dat_prot |= PROT_READ;
593 530 if (dataphdrp->p_flags & PF_W)
594 531 args->dat_prot |= PROT_WRITE;
595 532 if (dataphdrp->p_flags & PF_X)
596 533 args->dat_prot |= PROT_EXEC;
597 534 }
598 535
599 536 /*
600 537 * If a auxvector will be required - reserve the space for
601 538 * it now. This may be increased by exec_args if there are
602 539 * ISA-specific types (included in __KERN_NAUXV_IMPL).
603 540 */
604 541 if (hasauxv) {
605 542 /*
606 543 * If a AUX vector is being built - the base AUX
607 544 * entries are:
608 545 *
609 546 * AT_BASE
610 547 * AT_FLAGS
611 548 * AT_PAGESZ
612 549 * AT_RANDOM (added in stk_copyout)
613 550 * AT_SUN_AUXFLAGS
614 551 * AT_SUN_HWCAP
615 552 * AT_SUN_HWCAP2
616 553 * AT_SUN_PLATFORM (added in stk_copyout)
617 554 * AT_SUN_EXECNAME (added in stk_copyout)
618 555 * AT_NULL
619 556 *
620 557 * total == 10
621 558 */
622 559 if (hasintp && hasu) {
623 560 /*
624 561 * Has PT_INTERP & PT_PHDR - the auxvectors that
625 562 * will be built are:
626 563 *
627 564 * AT_PHDR
628 565 * AT_PHENT
629 566 * AT_PHNUM
630 567 * AT_ENTRY
631 568 * AT_LDDATA
632 569 *
633 570 * total = 5
634 571 */
635 572 args->auxsize = (10 + 5) * sizeof (aux_entry_t);
636 573 } else if (hasintp) {
637 574 /*
638 575 * Has PT_INTERP but no PT_PHDR
639 576 *
640 577 * AT_EXECFD
641 578 * AT_LDDATA
642 579 *
643 580 * total = 2
644 581 */
645 582 args->auxsize = (10 + 2) * sizeof (aux_entry_t);
646 583 } else {
647 584 args->auxsize = 10 * sizeof (aux_entry_t);
648 585 }
649 586 } else {
650 587 args->auxsize = 0;
651 588 }
652 589
653 590 /*
654 591 * If this binary is using an emulator, we need to add an
655 592 * AT_SUN_EMULATOR aux entry.
656 593 */
657 594 if (args->emulator != NULL)
658 595 args->auxsize += sizeof (aux_entry_t);
659 596
660 597 /*
661 598 * If this is a native binary that's been given a modified interpreter
662 599 * root, inform it that the native system exists at that root.
663 600 */
664 601 if (args->brand_nroot != NULL) {
665 602 args->auxsize += sizeof (aux_entry_t);
666 603 }
667 604
668 605
669 606 /*
670 607 * On supported kernels (x86_64) make room in the auxv for the
671 608 * AT_SUN_COMMPAGE entry. This will go unpopulated on i86xpv systems
672 609 * which do not provide such functionality.
673 610 *
674 611 * Additionally cover the floating point information AT_SUN_FPSIZE and
675 612 * AT_SUN_FPTYPE.
676 613 */
677 614 #if defined(__amd64)
678 615 args->auxsize += 3 * sizeof (aux_entry_t);
679 616 #endif /* defined(__amd64) */
680 617
681 618 /*
682 619 * If we have user credentials, we'll supply the following entries:
683 620 * AT_SUN_UID
684 621 * AT_SUN_RUID
685 622 * AT_SUN_GID
686 623 * AT_SUN_RGID
687 624 */
688 625 if (cred != NULL) {
689 626 args->auxsize += 4 * sizeof (aux_entry_t);
690 627 }
691 628
692 629 if ((*brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
693 630 branded = 1;
694 631 /*
695 632 * We will be adding 5 entries to the aux vectors. One for
696 633 * the the brandname and 4 for the brand specific aux vectors.
697 634 */
698 635 args->auxsize += 5 * sizeof (aux_entry_t);
699 636 }
700 637
701 638 /* If the binary has an explicit ASLR flag, it must be honoured */
702 639 if ((dynamicphdr != NULL) && (dynamicphdr->p_filesz > 0)) {
703 640 const size_t dynfilesz = dynamicphdr->p_filesz;
704 641 const size_t dynoffset = dynamicphdr->p_offset;
705 642 Dyn *dyn, *dp;
706 643
707 644 if (dynoffset > MAXOFFSET_T ||
708 645 dynfilesz > MAXOFFSET_T ||
709 646 dynoffset + dynfilesz > MAXOFFSET_T) {
710 647 uprintf("%s: cannot read full .dynamic section\n",
711 648 exec_file);
712 649 error = EINVAL;
713 650 goto out;
714 651 }
715 652
716 653 #define DYN_STRIDE 100
717 654 for (i = 0; i < dynfilesz; i += sizeof (*dyn) * DYN_STRIDE) {
718 655 const size_t remdyns = (dynfilesz - i) / sizeof (*dyn);
719 656 const size_t ndyns = MIN(DYN_STRIDE, remdyns);
720 657 const size_t dynsize = ndyns * sizeof (*dyn);
721 658
722 659 dyn = kmem_alloc(dynsize, KM_SLEEP);
723 660
724 661 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)dyn,
725 662 (ssize_t)dynsize, (offset_t)(dynoffset + i),
726 663 UIO_SYSSPACE, 0, (rlim64_t)0,
727 664 CRED(), NULL)) != 0) {
728 665 uprintf("%s: cannot read .dynamic section\n",
729 666 exec_file);
730 667 goto out;
731 668 }
732 669
733 670 for (dp = dyn; dp < (dyn + ndyns); dp++) {
734 671 if (dp->d_tag == DT_SUNW_ASLR) {
735 672 if ((error = handle_secflag_dt(p,
736 673 DT_SUNW_ASLR,
737 674 dp->d_un.d_val)) != 0) {
738 675 uprintf("%s: error setting "
739 676 "security-flag from "
740 677 "DT_SUNW_ASLR: %d\n",
741 678 exec_file, error);
742 679 goto out;
743 680 }
744 681 }
745 682 }
746 683
747 684 kmem_free(dyn, dynsize);
748 685 }
749 686 }
750 687
751 688 /* Hardware/Software capabilities */
752 689 if (capphdr != NULL &&
753 690 (capsize = capphdr->p_filesz) > 0 &&
754 691 capsize <= 16 * sizeof (*cap)) {
755 692 const uint_t ncaps = capsize / sizeof (*cap);
756 693 Cap *cp;
757 694
758 695 cap = kmem_alloc(capsize, KM_SLEEP);
759 696 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)cap,
760 697 (ssize_t)capsize, (offset_t)capphdr->p_offset,
761 698 UIO_SYSSPACE, 0, (rlim64_t)0, CRED(), NULL)) != 0) {
762 699 uprintf("%s: Cannot read capabilities section\n",
763 700 exec_file);
764 701 goto out;
765 702 }
766 703 for (cp = cap; cp < cap + ncaps; cp++) {
767 704 if (cp->c_tag == CA_SUNW_SF_1 &&
768 705 (cp->c_un.c_val & SF1_SUNW_ADDR32)) {
769 706 if (args->to_model == DATAMODEL_LP64)
770 707 args->addr32 = 1;
771 708 break;
772 709 }
773 710 }
774 711 }
775 712
776 713 aux = bigwad->elfargs;
777 714 /*
778 715 * Move args to the user's stack.
779 716 * This can fill in the AT_SUN_PLATFORM, AT_SUN_EXECNAME and AT_RANDOM
780 717 * aux entries.
781 718 */
782 719 if ((error = exec_args(uap, args, idatap, (void **)&aux)) != 0) {
783 720 if (error == -1) {
784 721 error = ENOEXEC;
785 722 goto bad;
786 723 }
787 724 goto out;
788 725 }
789 726 /* we're single threaded after this point */
790 727
791 728 /*
792 729 * If this is an ET_DYN executable (shared object),
793 730 * determine its memory size so that mapelfexec() can load it.
794 731 */
795 732 if (ehdrp->e_type == ET_DYN)
796 733 len = elfsize(ehdrp, nphdrs, phdrbase, NULL);
797 734 else
798 735 len = 0;
799 736
800 737 dtrphdr = NULL;
801 738
802 739 error = mapelfexec(vp, ehdrp, nphdrs, phdrbase, &uphdr, &intphdr,
803 740 &stphdr, &dtrphdr, dataphdrp, &bssbase, &brkbase, &voffset, NULL,
804 741 len, execsz, &brksize);
805 742 /*
806 743 * Our uphdr has been dynamically allocated if (and only if) its
807 744 * program header flags are clear. To avoid leaks, this must be
808 745 * checked regardless of whether mapelfexec() emitted an error.
809 746 */
810 747 dynuphdr = (uphdr != NULL && uphdr->p_flags == 0);
811 748
812 749 if (error != 0) {
813 750 goto bad;
814 751 }
815 752
816 753 if (uphdr != NULL && intphdr == NULL)
817 754 goto bad;
818 755
819 756 if (dtrphdr != NULL && dtrace_safe_phdr(dtrphdr, args, voffset) != 0) {
820 757 uprintf("%s: Bad DTrace phdr in %s\n", exec_file, exec_file);
821 758 goto bad;
822 759 }
823 760
824 761 if (intphdr != NULL) {
825 762 size_t len;
826 763 uintptr_t lddata;
827 764 char *p;
828 765 struct vnode *nvp;
829 766
830 767 dlnsize = intphdr->p_filesz + nsize;
831 768
832 769 /*
833 770 * Make sure none of the component pieces of dlnsize result in
834 771 * an oversized or zeroed result.
835 772 */
836 773 if (intphdr->p_filesz > MAXPATHLEN || dlnsize > MAXPATHLEN ||
837 774 dlnsize == 0 || dlnsize < intphdr->p_filesz) {
838 775 goto bad;
839 776 }
840 777
841 778 if (nsize != 0) {
842 779 bcopy(args->brand_nroot, dlnp, nsize - 1);
843 780 dlnp[nsize - 1] = '/';
844 781 }
845 782
846 783 /*
847 784 * Read in "interpreter" pathname.
848 785 */
849 786 if ((error = vn_rdwr(UIO_READ, vp, dlnp + nsize,
850 787 (ssize_t)intphdr->p_filesz, (offset_t)intphdr->p_offset,
851 788 UIO_SYSSPACE, 0, (rlim64_t)0, CRED(), &resid)) != 0) {
852 789 uprintf("%s: Cannot obtain interpreter pathname\n",
853 790 exec_file);
854 791 goto bad;
855 792 }
856 793
857 794 if (resid != 0 || dlnp[dlnsize - 1] != '\0')
858 795 goto bad;
859 796
860 797 /*
861 798 * Search for '$ORIGIN' token in interpreter path.
862 799 * If found, expand it.
863 800 */
864 801 for (p = dlnp; p = strchr(p, '$'); ) {
865 802 uint_t len, curlen;
866 803 char *_ptr;
867 804
868 805 if (strncmp(++p, ORIGIN_STR, ORIGIN_STR_SIZE))
869 806 continue;
870 807
871 808 /*
872 809 * We don't support $ORIGIN on setid programs to close
873 810 * a potential attack vector.
874 811 */
875 812 if ((setid & EXECSETID_SETID) != 0) {
876 813 error = ENOEXEC;
877 814 goto bad;
878 815 }
879 816
880 817 curlen = 0;
881 818 len = p - dlnp - 1;
882 819 if (len) {
883 820 bcopy(dlnp, pathbufp, len);
884 821 curlen += len;
885 822 }
886 823 if (_ptr = strrchr(args->pathname, '/')) {
887 824 len = _ptr - args->pathname;
888 825 if ((curlen + len) > MAXPATHLEN)
889 826 break;
890 827
891 828 bcopy(args->pathname, &pathbufp[curlen], len);
892 829 curlen += len;
893 830 } else {
894 831 /*
895 832 * executable is a basename found in the
896 833 * current directory. So - just substitue
897 834 * '.' for ORIGIN.
898 835 */
899 836 pathbufp[curlen] = '.';
900 837 curlen++;
901 838 }
902 839 p += ORIGIN_STR_SIZE;
903 840 len = strlen(p);
904 841
905 842 if ((curlen + len) > MAXPATHLEN)
906 843 break;
907 844 bcopy(p, &pathbufp[curlen], len);
908 845 curlen += len;
909 846 pathbufp[curlen++] = '\0';
910 847 bcopy(pathbufp, dlnp, curlen);
911 848 }
912 849
913 850 /*
914 851 * /usr/lib/ld.so.1 is known to be a symlink to /lib/ld.so.1
915 852 * (and /usr/lib/64/ld.so.1 is a symlink to /lib/64/ld.so.1).
916 853 * Just in case /usr is not mounted, change it now.
917 854 */
918 855 if (strcmp(dlnp, USR_LIB_RTLD) == 0)
919 856 dlnp += 4;
920 857 error = lookupname(dlnp, UIO_SYSSPACE, FOLLOW, NULLVPP, &nvp);
921 858 if (error && dlnp != bigwad->dl_name) {
922 859 /* new kernel, old user-level */
923 860 error = lookupname(dlnp -= 4, UIO_SYSSPACE, FOLLOW,
924 861 NULLVPP, &nvp);
925 862 }
926 863 if (error) {
927 864 uprintf("%s: Cannot find %s\n", exec_file, dlnp);
928 865 goto bad;
929 866 }
930 867
931 868 /*
932 869 * Setup the "aux" vector.
933 870 */
934 871 if (uphdr) {
935 872 if (ehdrp->e_type == ET_DYN) {
936 873 /* don't use the first page */
937 874 bigwad->exenv.ex_brkbase = (caddr_t)PAGESIZE;
938 875 bigwad->exenv.ex_bssbase = (caddr_t)PAGESIZE;
939 876 } else {
940 877 bigwad->exenv.ex_bssbase = bssbase;
941 878 bigwad->exenv.ex_brkbase = brkbase;
942 879 }
943 880 bigwad->exenv.ex_brksize = brksize;
944 881 bigwad->exenv.ex_magic = elfmagic;
945 882 bigwad->exenv.ex_vp = vp;
946 883 setexecenv(&bigwad->exenv);
947 884
948 885 ADDAUX(aux, AT_PHDR, uphdr->p_vaddr + voffset)
949 886 ADDAUX(aux, AT_PHENT, ehdrp->e_phentsize)
950 887 ADDAUX(aux, AT_PHNUM, nphdrs)
951 888 ADDAUX(aux, AT_ENTRY, ehdrp->e_entry + voffset)
952 889 } else {
953 890 if ((error = execopen(&vp, &fd)) != 0) {
954 891 VN_RELE(nvp);
955 892 goto bad;
956 893 }
957 894
958 895 ADDAUX(aux, AT_EXECFD, fd)
959 896 }
960 897
961 898 if ((error = execpermissions(nvp, &bigwad->vattr, args)) != 0) {
962 899 VN_RELE(nvp);
963 900 uprintf("%s: Cannot execute %s\n", exec_file, dlnp);
964 901 goto bad;
965 902 }
966 903
967 904 /*
968 905 * Now obtain the ELF header along with the entire program
969 906 * header contained in "nvp".
970 907 */
971 908 kmem_free(phdrbase, phdrsize);
972 909 phdrbase = NULL;
973 910 if ((error = getelfhead(nvp, CRED(), ehdrp, &nshdrs,
974 911 &shstrndx, &nphdrs)) != 0 ||
975 912 (error = getelfphdr(nvp, CRED(), ehdrp, nphdrs, &phdrbase,
976 913 &phdrsize)) != 0) {
977 914 VN_RELE(nvp);
978 915 uprintf("%s: Cannot read %s\n", exec_file, dlnp);
979 916 goto bad;
980 917 }
981 918
982 919 /*
983 920 * Determine memory size of the "interpreter's" loadable
984 921 * sections. This size is then used to obtain the virtual
985 922 * address of a hole, in the user's address space, large
986 923 * enough to map the "interpreter".
987 924 */
988 925 if ((len = elfsize(ehdrp, nphdrs, phdrbase, &lddata)) == 0) {
989 926 VN_RELE(nvp);
990 927 uprintf("%s: Nothing to load in %s\n", exec_file, dlnp);
991 928 goto bad;
992 929 }
993 930
994 931 dtrphdr = NULL;
995 932
996 933 error = mapelfexec(nvp, ehdrp, nphdrs, phdrbase, NULL, &junk,
997 934 &junk, &dtrphdr, NULL, NULL, NULL, &voffset, NULL, len,
998 935 execsz, NULL);
999 936
1000 937 if (error || junk != NULL) {
1001 938 VN_RELE(nvp);
1002 939 uprintf("%s: Cannot map %s\n", exec_file, dlnp);
1003 940 goto bad;
1004 941 }
1005 942
1006 943 /*
1007 944 * We use the DTrace program header to initialize the
1008 945 * architecture-specific user per-LWP location. The dtrace
1009 946 * fasttrap provider requires ready access to per-LWP scratch
1010 947 * space. We assume that there is only one such program header
1011 948 * in the interpreter.
1012 949 */
1013 950 if (dtrphdr != NULL &&
1014 951 dtrace_safe_phdr(dtrphdr, args, voffset) != 0) {
1015 952 VN_RELE(nvp);
1016 953 uprintf("%s: Bad DTrace phdr in %s\n", exec_file, dlnp);
1017 954 goto bad;
1018 955 }
1019 956
1020 957 VN_RELE(nvp);
1021 958 ADDAUX(aux, AT_SUN_LDDATA, voffset + lddata)
1022 959 }
1023 960
1024 961 if (hasauxv) {
1025 962 int auxf = AF_SUN_HWCAPVERIFY;
1026 963 #if defined(__amd64)
1027 964 size_t fpsize;
1028 965 int fptype;
1029 966 #endif /* defined(__amd64) */
1030 967
1031 968 /*
1032 969 * Note: AT_SUN_PLATFORM, AT_SUN_EXECNAME and AT_RANDOM were
1033 970 * filled in via exec_args()
1034 971 */
1035 972 ADDAUX(aux, AT_BASE, voffset)
1036 973 ADDAUX(aux, AT_FLAGS, at_flags)
1037 974 ADDAUX(aux, AT_PAGESZ, PAGESIZE)
1038 975 /*
1039 976 * Linker flags. (security)
1040 977 * p_flag not yet set at this time.
1041 978 * We rely on gexec() to provide us with the information.
1042 979 * If the application is set-uid but this is not reflected
1043 980 * in a mismatch between real/effective uids/gids, then
1044 981 * don't treat this as a set-uid exec. So we care about
1045 982 * the EXECSETID_UGIDS flag but not the ...SETID flag.
1046 983 */
1047 984 if ((setid &= ~EXECSETID_SETID) != 0)
1048 985 auxf |= AF_SUN_SETUGID;
1049 986
1050 987 /*
1051 988 * If we're running a native process from within a branded
1052 989 * zone under pfexec then we clear the AF_SUN_SETUGID flag so
1053 990 * that the native ld.so.1 is able to link with the native
1054 991 * libraries instead of using the brand libraries that are
1055 992 * installed in the zone. We only do this for processes
1056 993 * which we trust because we see they are already running
1057 994 * under pfexec (where uid != euid). This prevents a
1058 995 * malicious user within the zone from crafting a wrapper to
1059 996 * run native suid commands with unsecure libraries interposed.
1060 997 */
1061 998 if ((*brand_action == EBA_NATIVE) && (PROC_IS_BRANDED(p) &&
1062 999 (setid &= ~EXECSETID_SETID) != 0))
1063 1000 auxf &= ~AF_SUN_SETUGID;
1064 1001
1065 1002 /*
1066 1003 * Record the user addr of the auxflags aux vector entry
1067 1004 * since brands may optionally want to manipulate this field.
1068 1005 */
1069 1006 args->auxp_auxflags =
1070 1007 (char *)((char *)args->stackend +
1071 1008 ((char *)&aux->a_type -
1072 1009 (char *)bigwad->elfargs));
1073 1010 ADDAUX(aux, AT_SUN_AUXFLAGS, auxf);
1074 1011
1075 1012 /*
1076 1013 * Record information about the real and effective user and
1077 1014 * group IDs.
1078 1015 */
1079 1016 if (cred != NULL) {
1080 1017 ADDAUX(aux, AT_SUN_UID, crgetuid(cred));
1081 1018 ADDAUX(aux, AT_SUN_RUID, crgetruid(cred));
1082 1019 ADDAUX(aux, AT_SUN_GID, crgetgid(cred));
1083 1020 ADDAUX(aux, AT_SUN_RGID, crgetrgid(cred));
1084 1021 }
1085 1022
1086 1023 /*
1087 1024 * Hardware capability flag word (performance hints)
1088 1025 * Used for choosing faster library routines.
1089 1026 * (Potentially different between 32-bit and 64-bit ABIs)
1090 1027 */
1091 1028 #if defined(_LP64)
1092 1029 if (args->to_model == DATAMODEL_NATIVE) {
1093 1030 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
1094 1031 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2)
1095 1032 } else {
1096 1033 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap32)
1097 1034 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap32_2)
1098 1035 }
1099 1036 #else
1100 1037 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
1101 1038 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2)
1102 1039 #endif
1103 1040 if (branded) {
1104 1041 /*
1105 1042 * Reserve space for the brand-private aux vectors,
1106 1043 * and record the user addr of that space.
1107 1044 */
1108 1045 args->auxp_brand =
1109 1046 (char *)((char *)args->stackend +
1110 1047 ((char *)&aux->a_type -
1111 1048 (char *)bigwad->elfargs));
1112 1049 ADDAUX(aux, AT_SUN_BRAND_AUX1, 0)
1113 1050 ADDAUX(aux, AT_SUN_BRAND_AUX2, 0)
1114 1051 ADDAUX(aux, AT_SUN_BRAND_AUX3, 0)
1115 1052 ADDAUX(aux, AT_SUN_BRAND_AUX4, 0)
1116 1053 }
1117 1054
1118 1055 /*
1119 1056 * Add the comm page auxv entry, mapping it in if needed. Also
1120 1057 * take care of the FPU entries.
1121 1058 */
1122 1059 #if defined(__amd64)
1123 1060 if (args->commpage != (uintptr_t)NULL ||
1124 1061 (args->commpage = (uintptr_t)comm_page_mapin()) !=
1125 1062 (uintptr_t)NULL) {
1126 1063 ADDAUX(aux, AT_SUN_COMMPAGE, args->commpage)
1127 1064 } else {
1128 1065 /*
1129 1066 * If the comm page cannot be mapped, pad out the auxv
1130 1067 * to satisfy later size checks.
1131 1068 */
1132 1069 ADDAUX(aux, AT_NULL, 0)
1133 1070 }
1134 1071
1135 1072 fptype = AT_386_FPINFO_NONE;
1136 1073 fpu_auxv_info(&fptype, &fpsize);
1137 1074 if (fptype != AT_386_FPINFO_NONE) {
1138 1075 ADDAUX(aux, AT_SUN_FPTYPE, fptype)
1139 1076 ADDAUX(aux, AT_SUN_FPSIZE, fpsize)
1140 1077 } else {
1141 1078 ADDAUX(aux, AT_NULL, 0)
1142 1079 ADDAUX(aux, AT_NULL, 0)
1143 1080 }
1144 1081 #endif /* defined(__amd64) */
1145 1082
1146 1083 ADDAUX(aux, AT_NULL, 0)
1147 1084 postfixsize = (uintptr_t)aux - (uintptr_t)bigwad->elfargs;
1148 1085
1149 1086 /*
1150 1087 * We make assumptions above when we determine how many aux
1151 1088 * vector entries we will be adding. However, if we have an
1152 1089 * invalid elf file, it is possible that mapelfexec might
1153 1090 * behave differently (but not return an error), in which case
1154 1091 * the number of aux entries we actually add will be different.
1155 1092 * We detect that now and error out.
1156 1093 */
1157 1094 if (postfixsize != args->auxsize) {
1158 1095 DTRACE_PROBE2(elfexec_badaux, size_t, postfixsize,
1159 1096 size_t, args->auxsize);
1160 1097 goto bad;
1161 1098 }
1162 1099 ASSERT(postfixsize <= __KERN_NAUXV_IMPL * sizeof (aux_entry_t));
1163 1100 }
1164 1101
1165 1102 /*
1166 1103 * For the 64-bit kernel, the limit is big enough that rounding it up
1167 1104 * to a page can overflow the 64-bit limit, so we check for btopr()
1168 1105 * overflowing here by comparing it with the unrounded limit in pages.
1169 1106 * If it hasn't overflowed, compare the exec size with the rounded up
1170 1107 * limit in pages. Otherwise, just compare with the unrounded limit.
1171 1108 */
1172 1109 limit = btop(p->p_vmem_ctl);
1173 1110 roundlimit = btopr(p->p_vmem_ctl);
1174 1111 if ((roundlimit > limit && *execsz > roundlimit) ||
1175 1112 (roundlimit < limit && *execsz > limit)) {
1176 1113 mutex_enter(&p->p_lock);
1177 1114 (void) rctl_action(rctlproc_legacy[RLIMIT_VMEM], p->p_rctls, p,
1178 1115 RCA_SAFE);
1179 1116 mutex_exit(&p->p_lock);
1180 1117 error = ENOMEM;
1181 1118 goto bad;
1182 1119 }
1183 1120
1184 1121 bzero(up->u_auxv, sizeof (up->u_auxv));
1185 1122 up->u_commpagep = args->commpage;
1186 1123 if (postfixsize) {
1187 1124 size_t num_auxv;
1188 1125
1189 1126 /*
1190 1127 * Copy the aux vector to the user stack.
1191 1128 */
1192 1129 error = execpoststack(args, bigwad->elfargs, postfixsize);
1193 1130 if (error)
1194 1131 goto bad;
1195 1132
1196 1133 /*
1197 1134 * Copy auxv to the process's user structure for use by /proc.
1198 1135 * If this is a branded process, the brand's exec routine will
1199 1136 * copy it's private entries to the user structure later. It
1200 1137 * relies on the fact that the blank entries are at the end.
1201 1138 */
1202 1139 num_auxv = postfixsize / sizeof (aux_entry_t);
1203 1140 ASSERT(num_auxv <= sizeof (up->u_auxv) / sizeof (auxv_t));
1204 1141 aux = bigwad->elfargs;
1205 1142 for (i = 0; i < num_auxv; i++) {
1206 1143 up->u_auxv[i].a_type = aux[i].a_type;
1207 1144 up->u_auxv[i].a_un.a_val = (aux_val_t)aux[i].a_un.a_val;
1208 1145 }
1209 1146 }
1210 1147
1211 1148 /*
1212 1149 * Pass back the starting address so we can set the program counter.
1213 1150 */
1214 1151 args->entry = (uintptr_t)(ehdrp->e_entry + voffset);
1215 1152
1216 1153 if (!uphdr) {
1217 1154 if (ehdrp->e_type == ET_DYN) {
1218 1155 /*
1219 1156 * If we are executing a shared library which doesn't
1220 1157 * have a interpreter (probably ld.so.1) then
1221 1158 * we don't set the brkbase now. Instead we
1222 1159 * delay it's setting until the first call
1223 1160 * via grow.c::brk(). This permits ld.so.1 to
1224 1161 * initialize brkbase to the tail of the executable it
1225 1162 * loads (which is where it needs to be).
1226 1163 */
1227 1164 bigwad->exenv.ex_brkbase = (caddr_t)0;
1228 1165 bigwad->exenv.ex_bssbase = (caddr_t)0;
1229 1166 bigwad->exenv.ex_brksize = 0;
1230 1167 } else {
1231 1168 bigwad->exenv.ex_brkbase = brkbase;
1232 1169 bigwad->exenv.ex_bssbase = bssbase;
1233 1170 bigwad->exenv.ex_brksize = brksize;
1234 1171 }
1235 1172 bigwad->exenv.ex_magic = elfmagic;
1236 1173 bigwad->exenv.ex_vp = vp;
1237 1174 setexecenv(&bigwad->exenv);
1238 1175 }
1239 1176
1240 1177 ASSERT(error == 0);
1241 1178 goto out;
1242 1179
1243 1180 bad:
1244 1181 if (fd != -1) /* did we open the a.out yet */
1245 1182 (void) execclose(fd);
1246 1183
1247 1184 psignal(p, SIGKILL);
1248 1185
1249 1186 if (error == 0)
1250 1187 error = ENOEXEC;
1251 1188 out:
1252 1189 if (dynuphdr)
1253 1190 kmem_free(uphdr, sizeof (Phdr));
1254 1191 if (phdrbase != NULL)
1255 1192 kmem_free(phdrbase, phdrsize);
1256 1193 if (cap != NULL)
1257 1194 kmem_free(cap, capsize);
1258 1195 kmem_free(bigwad, sizeof (struct bigwad));
1259 1196 return (error);
1260 1197 }
1261 1198
1262 1199 /*
1263 1200 * Compute the memory size requirement for the ELF file.
1264 1201 */
1265 1202 static size_t
1266 1203 elfsize(const Ehdr *ehdrp, uint_t nphdrs, const caddr_t phdrbase,
1267 1204 uintptr_t *lddata)
1268 1205 {
1269 1206 const Phdr *phdrp = (Phdr *)phdrbase;
1270 1207 const uint_t hsize = ehdrp->e_phentsize;
1271 1208 boolean_t dfirst = B_TRUE;
1272 1209 uintptr_t loaddr = UINTPTR_MAX;
1273 1210 uintptr_t hiaddr = 0;
1274 1211 uint_t i;
1275 1212
1276 1213 for (i = nphdrs; i > 0; i--) {
1277 1214 if (phdrp->p_type == PT_LOAD) {
1278 1215 const uintptr_t lo = phdrp->p_vaddr;
1279 1216 const uintptr_t hi = lo + phdrp->p_memsz;
1280 1217
1281 1218 loaddr = MIN(lo, loaddr);
1282 1219 hiaddr = MAX(hi, hiaddr);
1283 1220
1284 1221 /*
1285 1222 * save the address of the first data segment
1286 1223 * of a object - used for the AT_SUNW_LDDATA
1287 1224 * aux entry.
1288 1225 */
1289 1226 if ((lddata != NULL) && dfirst &&
1290 1227 (phdrp->p_flags & PF_W)) {
1291 1228 *lddata = lo;
1292 1229 dfirst = B_FALSE;
1293 1230 }
1294 1231 }
1295 1232 phdrp = (Phdr *)((caddr_t)phdrp + hsize);
1296 1233 }
1297 1234
1298 1235 if (hiaddr <= loaddr) {
1299 1236 /* No non-zero PT_LOAD segment found */
1300 1237 return (0);
1301 1238 }
1302 1239
1303 1240 return (roundup(hiaddr - (loaddr & PAGEMASK), PAGESIZE));
1304 1241 }
1305 1242
1306 1243 /*
1307 1244 * Read in the ELF header and program header table.
1308 1245 * SUSV3 requires:
1309 1246 * ENOEXEC File format is not recognized
1310 1247 * EINVAL Format recognized but execution not supported
1311 1248 */
1312 1249 static int
1313 1250 getelfhead(vnode_t *vp, cred_t *credp, Ehdr *ehdr, uint_t *nshdrs,
1314 1251 uint_t *shstrndx, uint_t *nphdrs)
1315 1252 {
1316 1253 int error;
1317 1254 ssize_t resid;
1318 1255
1319 1256 /*
1320 1257 * We got here by the first two bytes in ident,
1321 1258 * now read the entire ELF header.
1322 1259 */
1323 1260 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)ehdr, sizeof (Ehdr),
1324 1261 (offset_t)0, UIO_SYSSPACE, 0, (rlim64_t)0, credp, &resid)) != 0) {
1325 1262 return (error);
1326 1263 }
1327 1264
1328 1265 /*
1329 1266 * Since a separate version is compiled for handling 32-bit and
1330 1267 * 64-bit ELF executables on a 64-bit kernel, the 64-bit version
1331 1268 * doesn't need to be able to deal with 32-bit ELF files.
1332 1269 */
1333 1270 if (resid != 0 ||
1334 1271 ehdr->e_ident[EI_MAG2] != ELFMAG2 ||
1335 1272 ehdr->e_ident[EI_MAG3] != ELFMAG3) {
1336 1273 return (ENOEXEC);
1337 1274 }
1338 1275
1339 1276 if ((ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) ||
1340 1277 #if defined(_ILP32) || defined(_ELF32_COMPAT)
1341 1278 ehdr->e_ident[EI_CLASS] != ELFCLASS32 ||
1342 1279 #else
1343 1280 ehdr->e_ident[EI_CLASS] != ELFCLASS64 ||
1344 1281 #endif
1345 1282 !elfheadcheck(ehdr->e_ident[EI_DATA], ehdr->e_machine,
1346 1283 ehdr->e_flags)) {
1347 1284 return (EINVAL);
1348 1285 }
1349 1286
1350 1287 *nshdrs = ehdr->e_shnum;
1351 1288 *shstrndx = ehdr->e_shstrndx;
1352 1289 *nphdrs = ehdr->e_phnum;
1353 1290
1354 1291 /*
1355 1292 * If e_shnum, e_shstrndx, or e_phnum is its sentinel value, we need
1356 1293 * to read in the section header at index zero to access the true
1357 1294 * values for those fields.
1358 1295 */
1359 1296 if ((*nshdrs == 0 && ehdr->e_shoff != 0) ||
1360 1297 *shstrndx == SHN_XINDEX || *nphdrs == PN_XNUM) {
1361 1298 Shdr shdr;
1362 1299
1363 1300 if (ehdr->e_shoff == 0)
1364 1301 return (EINVAL);
1365 1302
1366 1303 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)&shdr,
1367 1304 sizeof (shdr), (offset_t)ehdr->e_shoff, UIO_SYSSPACE, 0,
1368 1305 (rlim64_t)0, credp, NULL)) != 0)
1369 1306 return (error);
1370 1307
1371 1308 if (*nshdrs == 0)
1372 1309 *nshdrs = shdr.sh_size;
1373 1310 if (*shstrndx == SHN_XINDEX)
1374 1311 *shstrndx = shdr.sh_link;
1375 1312 if (*nphdrs == PN_XNUM && shdr.sh_info != 0)
1376 1313 *nphdrs = shdr.sh_info;
1377 1314 }
1378 1315
1379 1316 return (0);
1380 1317 }
1381 1318
1382 1319 /*
1383 1320 * We use members through p_flags on 32-bit files and p_memsz on 64-bit files,
1384 1321 * so e_phentsize must be at least large enough to include those members.
1385 1322 */
1386 1323 #if !defined(_LP64) || defined(_ELF32_COMPAT)
1387 1324 #define MINPHENTSZ (offsetof(Phdr, p_flags) + \
1388 1325 sizeof (((Phdr *)NULL)->p_flags))
1389 1326 #else
1390 1327 #define MINPHENTSZ (offsetof(Phdr, p_memsz) + \
1391 1328 sizeof (((Phdr *)NULL)->p_memsz))
1392 1329 #endif
1393 1330
1394 1331 static int
1395 1332 getelfphdr(vnode_t *vp, cred_t *credp, const Ehdr *ehdr, uint_t nphdrs,
1396 1333 caddr_t *phbasep, size_t *phsizep)
1397 1334 {
1398 1335 int err;
1399 1336
1400 1337 /*
1401 1338 * Ensure that e_phentsize is large enough for required fields to be
1402 1339 * accessible and will maintain 8-byte alignment.
1403 1340 */
1404 1341 if (ehdr->e_phentsize < MINPHENTSZ || (ehdr->e_phentsize & 3))
1405 1342 return (EINVAL);
1406 1343
1407 1344 *phsizep = nphdrs * ehdr->e_phentsize;
1408 1345
1409 1346 if (*phsizep > sizeof (Phdr) * elf_nphdr_max) {
1410 1347 if ((*phbasep = kmem_alloc(*phsizep, KM_NOSLEEP)) == NULL)
1411 1348 return (ENOMEM);
1412 1349 } else {
1413 1350 *phbasep = kmem_alloc(*phsizep, KM_SLEEP);
1414 1351 }
1415 1352
1416 1353 if ((err = vn_rdwr(UIO_READ, vp, *phbasep, (ssize_t)*phsizep,
1417 1354 (offset_t)ehdr->e_phoff, UIO_SYSSPACE, 0, (rlim64_t)0,
1418 1355 credp, NULL)) != 0) {
1419 1356 kmem_free(*phbasep, *phsizep);
1420 1357 *phbasep = NULL;
1421 1358 return (err);
1422 1359 }
1423 1360
1424 1361 return (0);
1425 1362 }
1426 1363
1427 1364 #define MINSHDRSZ (offsetof(Shdr, sh_entsize) + \
1428 1365 sizeof (((Shdr *)NULL)->sh_entsize))
1429 1366
1430 1367 static int
1431 1368 getelfshdr(vnode_t *vp, cred_t *credp, const Ehdr *ehdr, uint_t nshdrs,
1432 1369 uint_t shstrndx, caddr_t *shbasep, size_t *shsizep, char **shstrbasep,
1433 1370 size_t *shstrsizep)
1434 1371 {
1435 1372 int err;
1436 1373 Shdr *shdr;
1437 1374
1438 1375 /*
1439 1376 * Since we're going to be using e_shentsize to iterate down the
1440 1377 * array of section headers, it must be 8-byte aligned or else
1441 1378 * a we might cause a misaligned access. We use all members through
1442 1379 * sh_entsize (on both 32- and 64-bit ELF files) so e_shentsize
1443 1380 * must be at least large enough to include that member. The index
1444 1381 * of the string table section must also be valid.
1445 1382 */
1446 1383 if (ehdr->e_shentsize < MINSHDRSZ || (ehdr->e_shentsize & 3) ||
1447 1384 nshdrs == 0 || shstrndx >= nshdrs)
1448 1385 return (EINVAL);
1449 1386
1450 1387 *shsizep = nshdrs * ehdr->e_shentsize;
1451 1388
1452 1389 if (*shsizep > sizeof (Shdr) * elf_nshdr_max) {
1453 1390 if ((*shbasep = kmem_alloc(*shsizep, KM_NOSLEEP)) == NULL)
1454 1391 return (ENOMEM);
1455 1392 } else {
1456 1393 *shbasep = kmem_alloc(*shsizep, KM_SLEEP);
1457 1394 }
1458 1395
1459 1396 if ((err = vn_rdwr(UIO_READ, vp, *shbasep, (ssize_t)*shsizep,
1460 1397 (offset_t)ehdr->e_shoff, UIO_SYSSPACE, 0, (rlim64_t)0,
1461 1398 credp, NULL)) != 0) {
1462 1399 kmem_free(*shbasep, *shsizep);
1463 1400 return (err);
1464 1401 }
1465 1402
1466 1403 /*
1467 1404 * Grab the section string table. Walking through the shdrs is
1468 1405 * pointless if their names cannot be interrogated.
1469 1406 */
1470 1407 shdr = (Shdr *)(*shbasep + shstrndx * ehdr->e_shentsize);
1471 1408 if ((*shstrsizep = shdr->sh_size) == 0) {
1472 1409 kmem_free(*shbasep, *shsizep);
1473 1410 return (EINVAL);
1474 1411 }
1475 1412
1476 1413 if (*shstrsizep > elf_shstrtab_max) {
1477 1414 if ((*shstrbasep = kmem_alloc(*shstrsizep,
1478 1415 KM_NOSLEEP)) == NULL) {
1479 1416 kmem_free(*shbasep, *shsizep);
1480 1417 return (ENOMEM);
1481 1418 }
1482 1419 } else {
1483 1420 *shstrbasep = kmem_alloc(*shstrsizep, KM_SLEEP);
1484 1421 }
1485 1422
1486 1423 if ((err = vn_rdwr(UIO_READ, vp, *shstrbasep, (ssize_t)*shstrsizep,
1487 1424 (offset_t)shdr->sh_offset, UIO_SYSSPACE, 0, (rlim64_t)0,
1488 1425 credp, NULL)) != 0) {
1489 1426 kmem_free(*shbasep, *shsizep);
1490 1427 kmem_free(*shstrbasep, *shstrsizep);
1491 1428 return (err);
1492 1429 }
1493 1430
1494 1431 /*
1495 1432 * Make sure the strtab is null-terminated to make sure we
1496 1433 * don't run off the end of the table.
1497 1434 */
1498 1435 (*shstrbasep)[*shstrsizep - 1] = '\0';
1499 1436
1500 1437 return (0);
1501 1438 }
1502 1439
1503 1440
1504 1441 int
1505 1442 elfreadhdr(vnode_t *vp, cred_t *credp, Ehdr *ehdrp, uint_t *nphdrs,
1506 1443 caddr_t *phbasep, size_t *phsizep)
1507 1444 {
1508 1445 int error;
1509 1446 uint_t nshdrs, shstrndx;
1510 1447
1511 1448 if ((error = getelfhead(vp, credp, ehdrp, &nshdrs, &shstrndx,
1512 1449 nphdrs)) != 0 ||
1513 1450 (error = getelfphdr(vp, credp, ehdrp, *nphdrs, phbasep,
1514 1451 phsizep)) != 0) {
1515 1452 return (error);
1516 1453 }
1517 1454 return (0);
1518 1455 }
1519 1456
1520 1457
1521 1458 static int
1522 1459 mapelfexec(
1523 1460 vnode_t *vp,
1524 1461 Ehdr *ehdr,
1525 1462 uint_t nphdrs,
1526 1463 caddr_t phdrbase,
1527 1464 Phdr **uphdr,
1528 1465 Phdr **intphdr,
1529 1466 Phdr **stphdr,
1530 1467 Phdr **dtphdr,
1531 1468 Phdr *dataphdrp,
1532 1469 caddr_t *bssbase,
1533 1470 caddr_t *brkbase,
1534 1471 intptr_t *voffset,
1535 1472 uintptr_t *minaddrp,
1536 1473 size_t len,
1537 1474 size_t *execsz,
1538 1475 size_t *brksize)
1539 1476 {
1540 1477 Phdr *phdr;
1541 1478 int error, page, prot, lastprot = 0;
1542 1479 caddr_t addr = NULL;
1543 1480 caddr_t minaddr = (caddr_t)UINTPTR_MAX;
1544 1481 uint_t i;
1545 1482 size_t zfodsz, memsz;
1546 1483 boolean_t ptload = B_FALSE;
1547 1484 off_t offset;
1548 1485 const uint_t hsize = ehdr->e_phentsize;
1549 1486 uintptr_t lastaddr = 0;
1550 1487 extern int use_brk_lpg;
1551 1488
1552 1489 if (ehdr->e_type == ET_DYN) {
1553 1490 caddr_t vaddr;
1554 1491 secflagset_t flags = 0;
1555 1492 /*
1556 1493 * Obtain the virtual address of a hole in the
1557 1494 * address space to map the "interpreter".
1558 1495 */
1559 1496 if (secflag_enabled(curproc, PROC_SEC_ASLR))
1560 1497 flags |= _MAP_RANDOMIZE;
1561 1498
1562 1499 map_addr(&addr, len, (offset_t)0, 1, flags);
1563 1500 if (addr == NULL)
1564 1501 return (ENOMEM);
1565 1502
1566 1503 /*
1567 1504 * Despite the fact that mmapobj(2) refuses to load them, we
1568 1505 * need to support executing ET_DYN objects that have a
1569 1506 * non-NULL p_vaddr. When found in the wild, these objects
1570 1507 * are likely to be due to an old (and largely obviated) Linux
1571 1508 * facility, prelink(8), that rewrites shared objects to
1572 1509 * prefer specific (disjoint) virtual address ranges. (Yes,
1573 1510 * this is putatively for performance -- and yes, it has
1574 1511 * limited applicability, many edge conditions and grisly
1575 1512 * failure modes; even for Linux, it's insane.) As ELF
1576 1513 * mandates that the PT_LOAD segments be in p_vaddr order, we
1577 1514 * find the lowest p_vaddr by finding the first PT_LOAD
1578 1515 * segment.
1579 1516 */
1580 1517 phdr = (Phdr *)phdrbase;
1581 1518 for (i = nphdrs; i > 0; i--) {
1582 1519 if (phdr->p_type == PT_LOAD) {
1583 1520 addr = (caddr_t)(uintptr_t)phdr->p_vaddr;
1584 1521 break;
1585 1522 }
1586 1523 phdr = (Phdr *)((caddr_t)phdr + hsize);
1587 1524 }
1588 1525
1589 1526 /*
1590 1527 * We have a non-zero p_vaddr in the first PT_LOAD segment --
1591 1528 * presumably because we're directly executing a prelink(8)'d
1592 1529 * ld-linux.so. While we could correctly execute such an
1593 1530 * object without locating it at its desired p_vaddr (it is,
1594 1531 * after all, still relocatable), our inner antiquarian
1595 1532 * derives a perverse pleasure in accommodating the steampunk
1596 1533 * prelink(8) contraption -- goggles on!
1597 1534 */
1598 1535 if ((vaddr = addr) != NULL) {
1599 1536 if (as_gap(curproc->p_as, len, &addr, &len,
1600 1537 AH_LO, NULL) == -1 || addr != vaddr) {
1601 1538 addr = NULL;
1602 1539 }
1603 1540 }
1604 1541
1605 1542 if (addr == NULL) {
1606 1543 /*
1607 1544 * We either have a NULL p_vaddr (the common case, by
1608 1545 * many orders of magnitude) or we have a non-NULL
1609 1546 * p_vaddr and we were unable to obtain the specified
1610 1547 * VA range (presumably because it's an illegal
1611 1548 * address). Either way, obtain an address in which
1612 1549 * to map the interpreter.
1613 1550 */
1614 1551 map_addr(&addr, len, (offset_t)0, 1, 0);
1615 1552 if (addr == NULL)
1616 1553 return (ENOMEM);
1617 1554 }
1618 1555
1619 1556 /*
1620 1557 * Our voffset is the difference between where we landed and
1621 1558 * where we wanted to be.
1622 1559 */
1623 1560 *voffset = (uintptr_t)addr - (uintptr_t)vaddr;
1624 1561 } else {
1625 1562 *voffset = 0;
1626 1563 }
1627 1564
1628 1565 phdr = (Phdr *)phdrbase;
1629 1566 for (i = nphdrs; i > 0; i--) {
1630 1567 switch (phdr->p_type) {
1631 1568 case PT_LOAD:
1632 1569 ptload = B_TRUE;
1633 1570 prot = PROT_USER;
1634 1571 if (phdr->p_flags & PF_R)
1635 1572 prot |= PROT_READ;
1636 1573 if (phdr->p_flags & PF_W)
1637 1574 prot |= PROT_WRITE;
1638 1575 if (phdr->p_flags & PF_X)
1639 1576 prot |= PROT_EXEC;
1640 1577
1641 1578 addr = (caddr_t)((uintptr_t)phdr->p_vaddr + *voffset);
1642 1579
1643 1580 if ((*intphdr != NULL) && uphdr != NULL &&
1644 1581 (*uphdr == NULL)) {
1645 1582 /*
1646 1583 * The PT_PHDR program header is, strictly
1647 1584 * speaking, optional. If we find that this
1648 1585 * is missing, we will determine the location
1649 1586 * of the program headers based on the address
1650 1587 * of the lowest PT_LOAD segment (namely, this
1651 1588 * one): we subtract the p_offset to get to
1652 1589 * the ELF header and then add back the program
1653 1590 * header offset to get to the program headers.
1654 1591 * We then cons up a Phdr that corresponds to
1655 1592 * the (missing) PT_PHDR, setting the flags
1656 1593 * to 0 to denote that this is artificial and
1657 1594 * should (must) be freed by the caller.
1658 1595 */
1659 1596 Phdr *cons;
1660 1597
1661 1598 cons = kmem_zalloc(sizeof (Phdr), KM_SLEEP);
1662 1599
1663 1600 cons->p_flags = 0;
1664 1601 cons->p_type = PT_PHDR;
1665 1602 cons->p_vaddr = ((uintptr_t)addr -
1666 1603 phdr->p_offset) + ehdr->e_phoff;
1667 1604
1668 1605 *uphdr = cons;
1669 1606 }
1670 1607
1671 1608 /*
1672 1609 * The ELF spec dictates that p_filesz may not be
1673 1610 * larger than p_memsz in PT_LOAD segments.
1674 1611 */
1675 1612 if (phdr->p_filesz > phdr->p_memsz) {
1676 1613 error = EINVAL;
1677 1614 goto bad;
1678 1615 }
1679 1616
1680 1617 /*
1681 1618 * Keep track of the segment with the lowest starting
1682 1619 * address.
1683 1620 */
1684 1621 if (addr < minaddr)
1685 1622 minaddr = addr;
1686 1623
1687 1624 /*
1688 1625 * Segments need not correspond to page boundaries:
1689 1626 * they are permitted to share a page. If two PT_LOAD
1690 1627 * segments share the same page, and the permissions
1691 1628 * of the segments differ, the behavior is historically
1692 1629 * that the permissions of the latter segment are used
1693 1630 * for the page that the two segments share. This is
1694 1631 * also historically a non-issue: binaries generated
1695 1632 * by most anything will make sure that two PT_LOAD
1696 1633 * segments with differing permissions don't actually
1697 1634 * share any pages. However, there exist some crazy
1698 1635 * things out there (including at least an obscure
1699 1636 * Portuguese teaching language called G-Portugol) that
1700 1637 * actually do the wrong thing and expect it to work:
1701 1638 * they have a segment with execute permission share
1702 1639 * a page with a subsequent segment that does not
1703 1640 * have execute permissions and expect the resulting
1704 1641 * shared page to in fact be executable. To accommodate
1705 1642 * such broken link editors, we take advantage of a
1706 1643 * latitude explicitly granted to the loader: it is
1707 1644 * permitted to make _any_ PT_LOAD segment executable
1708 1645 * (provided that it is readable or writable). If we
1709 1646 * see that we're sharing a page and that the previous
1710 1647 * page was executable, we will add execute permissions
1711 1648 * to our segment.
1712 1649 */
1713 1650 if (btop(lastaddr) == btop((uintptr_t)addr) &&
1714 1651 (phdr->p_flags & (PF_R | PF_W)) &&
1715 1652 (lastprot & PROT_EXEC)) {
1716 1653 prot |= PROT_EXEC;
1717 1654 }
1718 1655
1719 1656 lastaddr = (uintptr_t)addr + phdr->p_filesz;
1720 1657 lastprot = prot;
1721 1658
1722 1659 zfodsz = (size_t)phdr->p_memsz - phdr->p_filesz;
1723 1660
1724 1661 offset = phdr->p_offset;
1725 1662 if (((uintptr_t)offset & PAGEOFFSET) ==
1726 1663 ((uintptr_t)addr & PAGEOFFSET) &&
1727 1664 (!(vp->v_flag & VNOMAP))) {
1728 1665 page = 1;
1729 1666 } else {
1730 1667 page = 0;
1731 1668 }
1732 1669
1733 1670 /*
1734 1671 * Set the heap pagesize for OOB when the bss size
1735 1672 * is known and use_brk_lpg is not 0.
1736 1673 */
1737 1674 if (brksize != NULL && use_brk_lpg &&
1738 1675 zfodsz != 0 && phdr == dataphdrp &&
1739 1676 (prot & PROT_WRITE)) {
1740 1677 const size_t tlen = P2NPHASE((uintptr_t)addr +
1741 1678 phdr->p_filesz, PAGESIZE);
1742 1679
1743 1680 if (zfodsz > tlen) {
1744 1681 const caddr_t taddr = addr +
1745 1682 phdr->p_filesz + tlen;
1746 1683
1747 1684 /*
1748 1685 * Since a hole in the AS large enough
1749 1686 * for this object as calculated by
1750 1687 * elfsize() is available, we do not
1751 1688 * need to fear overflow for 'taddr'.
1752 1689 */
1753 1690 curproc->p_brkpageszc =
1754 1691 page_szc(map_pgsz(MAPPGSZ_HEAP,
1755 1692 curproc, taddr, zfodsz - tlen, 0));
1756 1693 }
1757 1694 }
1758 1695
1759 1696 if (curproc->p_brkpageszc != 0 && phdr == dataphdrp &&
1760 1697 (prot & PROT_WRITE)) {
1761 1698 uint_t szc = curproc->p_brkpageszc;
1762 1699 size_t pgsz = page_get_pagesize(szc);
1763 1700 caddr_t ebss = addr + phdr->p_memsz;
1764 1701 /*
1765 1702 * If we need extra space to keep the BSS an
1766 1703 * integral number of pages in size, some of
1767 1704 * that space may fall beyond p_brkbase, so we
1768 1705 * need to set p_brksize to account for it
1769 1706 * being (logically) part of the brk.
1770 1707 */
1771 1708 size_t extra_zfodsz;
1772 1709
1773 1710 ASSERT(pgsz > PAGESIZE);
1774 1711
1775 1712 extra_zfodsz = P2NPHASE((uintptr_t)ebss, pgsz);
1776 1713
1777 1714 if (error = execmap(vp, addr, phdr->p_filesz,
1778 1715 zfodsz + extra_zfodsz, phdr->p_offset,
1779 1716 prot, page, szc))
1780 1717 goto bad;
1781 1718 if (brksize != NULL)
1782 1719 *brksize = extra_zfodsz;
1783 1720 } else {
1784 1721 if (error = execmap(vp, addr, phdr->p_filesz,
1785 1722 zfodsz, phdr->p_offset, prot, page, 0))
1786 1723 goto bad;
1787 1724 }
1788 1725
1789 1726 if (bssbase != NULL && addr >= *bssbase &&
1790 1727 phdr == dataphdrp) {
1791 1728 *bssbase = addr + phdr->p_filesz;
1792 1729 }
1793 1730 if (brkbase != NULL && addr >= *brkbase) {
1794 1731 *brkbase = addr + phdr->p_memsz;
1795 1732 }
1796 1733
1797 1734 memsz = btopr(phdr->p_memsz);
1798 1735 if ((*execsz + memsz) < *execsz) {
1799 1736 error = ENOMEM;
1800 1737 goto bad;
1801 1738 }
1802 1739 *execsz += memsz;
1803 1740 break;
1804 1741
1805 1742 case PT_INTERP:
1806 1743 /*
1807 1744 * The ELF specification is unequivocal about the
1808 1745 * PT_INTERP program header with respect to any PT_LOAD
1809 1746 * program header: "If it is present, it must precede
1810 1747 * any loadable segment entry." Linux, however, makes
1811 1748 * no attempt to enforce this -- which has allowed some
1812 1749 * binary editing tools to get away with generating
1813 1750 * invalid ELF binaries in the respect that PT_INTERP
1814 1751 * occurs after the first PT_LOAD program header. This
1815 1752 * is unfortunate (and of course, disappointing) but
1816 1753 * it's no worse than that: there is no reason that we
1817 1754 * can't process the PT_INTERP entry (if present) after
1818 1755 * one or more PT_LOAD entries. We therefore
1819 1756 * deliberately do not check ptload here and always
1820 1757 * store dyphdr to be the PT_INTERP program header.
1821 1758 */
1822 1759 *intphdr = phdr;
1823 1760 break;
1824 1761
1825 1762 case PT_SHLIB:
1826 1763 *stphdr = phdr;
1827 1764 break;
1828 1765
1829 1766 case PT_PHDR:
1830 1767 if (ptload || phdr->p_flags == 0)
1831 1768 goto bad;
1832 1769
1833 1770 if (uphdr != NULL)
1834 1771 *uphdr = phdr;
1835 1772
1836 1773 break;
1837 1774
1838 1775 case PT_NULL:
1839 1776 case PT_DYNAMIC:
1840 1777 case PT_NOTE:
1841 1778 break;
1842 1779
1843 1780 case PT_SUNWDTRACE:
1844 1781 if (dtphdr != NULL)
1845 1782 *dtphdr = phdr;
1846 1783 break;
1847 1784
1848 1785 default:
1849 1786 break;
1850 1787 }
1851 1788 phdr = (Phdr *)((caddr_t)phdr + hsize);
1852 1789 }
1853 1790
1854 1791 if (minaddrp != NULL) {
1855 1792 ASSERT(minaddr != (caddr_t)UINTPTR_MAX);
1856 1793 *minaddrp = (uintptr_t)minaddr;
1857 1794 }
1858 1795
1859 1796 if (brkbase != NULL && secflag_enabled(curproc, PROC_SEC_ASLR)) {
1860 1797 size_t off;
1861 1798 uintptr_t base = (uintptr_t)*brkbase;
1862 1799 uintptr_t oend = base + *brksize;
1863 1800
1864 1801 ASSERT(ISP2(aslr_max_brk_skew));
1865 1802
1866 1803 (void) random_get_pseudo_bytes((uint8_t *)&off, sizeof (off));
1867 1804 base += P2PHASE(off, aslr_max_brk_skew);
1868 1805 base = P2ROUNDUP(base, PAGESIZE);
1869 1806 *brkbase = (caddr_t)base;
1870 1807 /*
1871 1808 * Above, we set *brksize to account for the possibility we
1872 1809 * had to grow the 'brk' in padding out the BSS to a page
1873 1810 * boundary.
1874 1811 *
1875 1812 * We now need to adjust that based on where we now are
1876 1813 * actually putting the brk.
1877 1814 */
1878 1815 if (oend > base)
1879 1816 *brksize = oend - base;
1880 1817 else
1881 1818 *brksize = 0;
1882 1819 }
1883 1820
1884 1821 return (0);
1885 1822 bad:
1886 1823 if (error == 0)
1887 1824 error = EINVAL;
1888 1825 return (error);
1889 1826 }
1890 1827
1891 1828 int
1892 1829 elfnote(vnode_t *vp, offset_t *offsetp, int type, int descsz, void *desc,
1893 1830 rlim64_t rlimit, cred_t *credp)
1894 1831 {
1895 1832 Note note;
1896 1833 int error;
1897 1834
1898 1835 bzero(¬e, sizeof (note));
1899 1836 bcopy("CORE", note.name, 4);
1900 1837 note.nhdr.n_type = type;
1901 1838 /*
1902 1839 * The System V ABI states that n_namesz must be the length of the
1903 1840 * string that follows the Nhdr structure including the terminating
1904 1841 * null. The ABI also specifies that sufficient padding should be
1905 1842 * included so that the description that follows the name string
1906 1843 * begins on a 4- or 8-byte boundary for 32- and 64-bit binaries
1907 1844 * respectively. However, since this change was not made correctly
1908 1845 * at the time of the 64-bit port, both 32- and 64-bit binaries
1909 1846 * descriptions are only guaranteed to begin on a 4-byte boundary.
1910 1847 */
1911 1848 note.nhdr.n_namesz = 5;
1912 1849 note.nhdr.n_descsz = roundup(descsz, sizeof (Word));
1913 1850
1914 1851 if (error = core_write(vp, UIO_SYSSPACE, *offsetp, ¬e,
1915 1852 sizeof (note), rlimit, credp))
1916 1853 return (error);
1917 1854
1918 1855 *offsetp += sizeof (note);
1919 1856
1920 1857 if (error = core_write(vp, UIO_SYSSPACE, *offsetp, desc,
1921 1858 note.nhdr.n_descsz, rlimit, credp))
1922 1859 return (error);
1923 1860
1924 1861 *offsetp += note.nhdr.n_descsz;
1925 1862 return (0);
1926 1863 }
1927 1864
1928 1865
1929 1866 /*
1930 1867 * Copy the section data from one vnode to the section of another vnode.
1931 1868 */
1932 1869 static void
1933 1870 elf_copy_scn(elf_core_ctx_t *ctx, const Shdr *src, vnode_t *src_vp, Shdr *dst)
1934 1871 {
1935 1872 size_t n = src->sh_size;
1936 1873 u_offset_t off = 0;
1937 1874 const u_offset_t soff = src->sh_offset;
1938 1875 const u_offset_t doff = ctx->ecc_doffset;
1939 1876 void *buf = ctx->ecc_buf;
1940 1877 vnode_t *dst_vp = ctx->ecc_vp;
1941 1878 cred_t *credp = ctx->ecc_credp;
1942 1879
1943 1880 /* Protect the copy loop below from overflow on the offsets */
1944 1881 if (n > OFF_MAX || (n + soff) > OFF_MAX || (n + doff) > OFF_MAX ||
1945 1882 (n + soff) < n || (n + doff) < n) {
1946 1883 dst->sh_size = 0;
1947 1884 dst->sh_offset = 0;
1948 1885 return;
1949 1886 }
1950 1887
1951 1888 while (n != 0) {
1952 1889 const size_t len = MIN(ctx->ecc_bufsz, n);
1953 1890 ssize_t resid;
1954 1891
1955 1892 if (vn_rdwr(UIO_READ, src_vp, buf, (ssize_t)len,
1956 1893 (offset_t)(soff + off),
1957 1894 UIO_SYSSPACE, 0, (rlim64_t)0, credp, &resid) != 0 ||
1958 1895 resid >= len || resid < 0 ||
1959 1896 core_write(dst_vp, UIO_SYSSPACE, (offset_t)(doff + off),
1960 1897 buf, len - resid, ctx->ecc_rlimit, credp) != 0) {
1961 1898 dst->sh_size = 0;
1962 1899 dst->sh_offset = 0;
1963 1900 return;
1964 1901 }
1965 1902
1966 1903 ASSERT(n >= len - resid);
|
↓ open down ↓ |
1781 lines elided |
↑ open up ↑ |
1967 1904
1968 1905 n -= len - resid;
1969 1906 off += len - resid;
1970 1907 }
1971 1908
1972 1909 ctx->ecc_doffset += src->sh_size;
1973 1910 }
1974 1911
1975 1912 /*
1976 1913 * Walk sections for a given ELF object, counting (or copying) those of
1977 - * interest (CTF, symtab, strtab).
1914 + * interest (CTF, symtab, strtab, DWARF debug).
1915 + *
1916 + * Returns UINT_MAX upon low-memory.
1978 1917 */
1979 1918 static uint_t
1980 1919 elf_process_obj_scns(elf_core_ctx_t *ctx, vnode_t *mvp, caddr_t saddr,
1981 1920 Shdr *v, uint_t idx, uint_t remain, shstrtab_t *shstrtab)
1982 1921 {
1983 1922 Ehdr ehdr;
1984 1923 const core_content_t content = ctx->ecc_content;
1985 1924 cred_t *credp = ctx->ecc_credp;
1986 1925 Shdr *ctf = NULL, *symtab = NULL, *strtab = NULL;
1987 1926 uintptr_t off = 0;
1988 1927 uint_t nshdrs, shstrndx, nphdrs, count = 0;
1989 1928 u_offset_t *doffp = &ctx->ecc_doffset;
1990 1929 boolean_t ctf_link = B_FALSE;
1991 1930 caddr_t shbase;
1992 1931 size_t shsize, shstrsize;
1993 1932 char *shstrbase;
1994 1933
1995 - if ((content & (CC_CONTENT_CTF | CC_CONTENT_SYMTAB)) == 0) {
1934 + if ((content & (CC_CONTENT_CTF | CC_CONTENT_SYMTAB | CC_CONTENT_DEBUG))
1935 + == 0) {
1996 1936 return (0);
1997 1937 }
1998 1938
1999 1939 if (getelfhead(mvp, credp, &ehdr, &nshdrs, &shstrndx, &nphdrs) != 0 ||
2000 1940 getelfshdr(mvp, credp, &ehdr, nshdrs, shstrndx, &shbase, &shsize,
2001 1941 &shstrbase, &shstrsize) != 0) {
2002 1942 return (0);
2003 1943 }
2004 1944
2005 1945 /* Starting at index 1 skips SHT_NULL which is expected at index 0 */
2006 1946 off = ehdr.e_shentsize;
2007 1947 for (uint_t i = 1; i < nshdrs; i++, off += ehdr.e_shentsize) {
2008 1948 Shdr *shdr, *symchk = NULL, *strchk;
2009 1949 const char *name;
2010 1950
2011 1951 shdr = (Shdr *)(shbase + off);
2012 1952 if (shdr->sh_name >= shstrsize || shdr->sh_type == SHT_NULL)
2013 1953 continue;
2014 1954
2015 1955 name = shstrbase + shdr->sh_name;
2016 1956
2017 1957 if (ctf == NULL &&
2018 1958 (content & CC_CONTENT_CTF) != 0 &&
2019 1959 strcmp(name, shstrtab_data[STR_CTF]) == 0) {
2020 1960 ctf = shdr;
2021 1961 if (ctf->sh_link != 0 && ctf->sh_link < nshdrs) {
2022 1962 /* check linked symtab below */
|
↓ open down ↓ |
17 lines elided |
↑ open up ↑ |
2023 1963 symchk = (Shdr *)(shbase +
2024 1964 shdr->sh_link * ehdr.e_shentsize);
2025 1965 ctf_link = B_TRUE;
2026 1966 } else {
2027 1967 continue;
2028 1968 }
2029 1969 } else if (symtab == NULL &&
2030 1970 (content & CC_CONTENT_SYMTAB) != 0 &&
2031 1971 strcmp(name, shstrtab_data[STR_SYMTAB]) == 0) {
2032 1972 symchk = shdr;
1973 + } else if ((content & CC_CONTENT_DEBUG) != 0 &&
1974 + strncmp(name, ".debug_", strlen(".debug_")) == 0) {
1975 + /*
1976 + * The design of the above check is intentional. In
1977 + * particular, we want to capture any sections that
1978 + * begin with '.debug_' for a few reasons:
1979 + *
1980 + * 1) Various revisions to the DWARF spec end up
1981 + * changing the set of section headers that
1982 + * exist. This ensures that we don't need to change
1983 + * the kernel to get a new version.
1984 + *
1985 + * 2) Other software uses .debug_ sections for things
1986 + * which aren't DWARF. This allows them to be captured
1987 + * as well.
1988 + *
1989 + * Because of this, we emit straight here, unlike the
1990 + * other two sections where we wait until we're done
1991 + * scanning.
1992 + */
1993 +
1994 + /* We're only counting, don't emit! */
1995 + if (v == NULL) {
1996 + count++;
1997 + continue;
1998 + }
1999 +
2000 + elf_ctx_resize_scratch(ctx, shdr->sh_size);
2001 + if (!shstrtab_ndx(shstrtab, name, &v[idx].sh_name)) {
2002 + count = UINT_MAX;
2003 + goto done;
2004 + }
2005 + v[idx].sh_addr = (Addr)(uintptr_t)saddr;
2006 + v[idx].sh_type = shdr->sh_type;
2007 + v[idx].sh_addralign = shdr->sh_addralign;
2008 + *doffp = roundup(*doffp, v[idx].sh_addralign);
2009 + v[idx].sh_offset = *doffp;
2010 + v[idx].sh_size = shdr->sh_size;
2011 + v[idx].sh_link = 0;
2012 + v[idx].sh_entsize = shdr->sh_entsize;
2013 + v[idx].sh_info = shdr->sh_info;
2014 +
2015 + elf_copy_scn(ctx, shdr, mvp, &v[idx]);
2016 + count++;
2017 + idx++;
2018 + continue;
2033 2019 } else {
2034 2020 continue;
2035 2021 }
2036 2022
2037 2023 ASSERT(symchk != NULL);
2038 2024 if ((symchk->sh_type != SHT_DYNSYM &&
2039 2025 symchk->sh_type != SHT_SYMTAB) ||
2040 2026 symchk->sh_link == 0 || symchk->sh_link >= nshdrs) {
2041 2027 ctf_link = B_FALSE;
2042 2028 continue;
2043 2029 }
2044 2030 strchk = (Shdr *)(shbase + symchk->sh_link * ehdr.e_shentsize);
2045 2031 if (strchk->sh_type != SHT_STRTAB) {
2046 2032 ctf_link = B_FALSE;
2047 2033 continue;
2048 2034 }
2049 2035 symtab = symchk;
2050 2036 strtab = strchk;
2051 2037
2052 - if (symtab != NULL && ctf != NULL) {
2038 + if (symtab != NULL && ctf != NULL &&
2039 + (content & CC_CONTENT_DEBUG) == 0) {
2053 2040 /* No other shdrs are of interest at this point */
2054 2041 break;
2055 2042 }
2056 2043 }
2057 2044
2058 2045 if (ctf != NULL)
2059 2046 count += 1;
2060 2047 if (symtab != NULL)
2061 2048 count += 2;
2049 +
2062 2050 if (v == NULL || count == 0 || count > remain) {
2063 2051 count = MIN(count, remain);
2064 2052 goto done;
2065 2053 }
2066 2054
2067 2055 /* output CTF section */
2068 2056 if (ctf != NULL) {
2069 2057 elf_ctx_resize_scratch(ctx, ctf->sh_size);
2070 2058
2071 - v[idx].sh_name = shstrtab_ndx(shstrtab, STR_CTF);
2059 + if (!shstrtab_ndx(shstrtab, shstrtab_data[STR_CTF],
2060 + &v[idx].sh_name)) {
2061 + count = UINT_MAX;
2062 + goto done;
2063 + }
2064 +
2072 2065 v[idx].sh_addr = (Addr)(uintptr_t)saddr;
2073 2066 v[idx].sh_type = SHT_PROGBITS;
2074 2067 v[idx].sh_addralign = 4;
2075 2068 *doffp = roundup(*doffp, v[idx].sh_addralign);
2076 2069 v[idx].sh_offset = *doffp;
2077 2070 v[idx].sh_size = ctf->sh_size;
2078 2071
2079 2072 if (ctf_link) {
2080 2073 /*
2081 2074 * The linked symtab (and strtab) will be output
2082 2075 * immediately after this CTF section. Its shdr index
2083 2076 * directly follows this one.
2084 2077 */
2085 2078 v[idx].sh_link = idx + 1;
2086 2079 ASSERT(symtab != NULL);
2087 2080 } else {
2088 2081 v[idx].sh_link = 0;
2089 2082 }
2090 2083 elf_copy_scn(ctx, ctf, mvp, &v[idx]);
2091 2084 idx++;
|
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
2092 2085 }
2093 2086
2094 2087 /* output SYMTAB/STRTAB sections */
2095 2088 if (symtab != NULL) {
2096 2089 uint_t symtab_name, strtab_name;
2097 2090
2098 2091 elf_ctx_resize_scratch(ctx,
2099 2092 MAX(symtab->sh_size, strtab->sh_size));
2100 2093
2101 2094 if (symtab->sh_type == SHT_DYNSYM) {
2102 - symtab_name = shstrtab_ndx(shstrtab, STR_DYNSYM);
2103 - strtab_name = shstrtab_ndx(shstrtab, STR_DYNSTR);
2095 + if (!shstrtab_ndx(shstrtab, shstrtab_data[STR_DYNSYM],
2096 + &symtab_name) ||
2097 + !shstrtab_ndx(shstrtab, shstrtab_data[STR_DYNSTR],
2098 + &strtab_name)) {
2099 + count = UINT_MAX;
2100 + goto done;
2101 + }
2104 2102 } else {
2105 - symtab_name = shstrtab_ndx(shstrtab, STR_SYMTAB);
2106 - strtab_name = shstrtab_ndx(shstrtab, STR_STRTAB);
2103 + if (!shstrtab_ndx(shstrtab, shstrtab_data[STR_SYMTAB],
2104 + &symtab_name) ||
2105 + !shstrtab_ndx(shstrtab, shstrtab_data[STR_STRTAB],
2106 + &strtab_name)) {
2107 + count = UINT_MAX;
2108 + goto done;
2109 + }
2107 2110 }
2108 2111
2109 2112 v[idx].sh_name = symtab_name;
2110 2113 v[idx].sh_type = symtab->sh_type;
2111 2114 v[idx].sh_addr = symtab->sh_addr;
2112 2115 if (ehdr.e_type == ET_DYN || v[idx].sh_addr == 0)
2113 2116 v[idx].sh_addr += (Addr)(uintptr_t)saddr;
2114 2117 v[idx].sh_addralign = symtab->sh_addralign;
2115 2118 *doffp = roundup(*doffp, v[idx].sh_addralign);
2116 2119 v[idx].sh_offset = *doffp;
2117 2120 v[idx].sh_size = symtab->sh_size;
2118 2121 v[idx].sh_link = idx + 1;
2119 2122 v[idx].sh_entsize = symtab->sh_entsize;
2120 2123 v[idx].sh_info = symtab->sh_info;
2121 2124
2122 2125 elf_copy_scn(ctx, symtab, mvp, &v[idx]);
2123 2126 idx++;
2124 2127
2125 2128 v[idx].sh_name = strtab_name;
2126 2129 v[idx].sh_type = SHT_STRTAB;
2127 2130 v[idx].sh_flags = SHF_STRINGS;
2128 2131 v[idx].sh_addr = strtab->sh_addr;
2129 2132 if (ehdr.e_type == ET_DYN || v[idx].sh_addr == 0)
2130 2133 v[idx].sh_addr += (Addr)(uintptr_t)saddr;
2131 2134 v[idx].sh_addralign = strtab->sh_addralign;
2132 2135 *doffp = roundup(*doffp, v[idx].sh_addralign);
2133 2136 v[idx].sh_offset = *doffp;
2134 2137 v[idx].sh_size = strtab->sh_size;
2135 2138
2136 2139 elf_copy_scn(ctx, strtab, mvp, &v[idx]);
2137 2140 idx++;
2138 2141 }
2139 2142
2140 2143 done:
2141 2144 kmem_free(shstrbase, shstrsize);
2142 2145 kmem_free(shbase, shsize);
2143 2146 return (count);
2144 2147 }
2145 2148
2146 2149 /*
2147 2150 * Walk mappings in process address space, examining those which correspond to
2148 2151 * loaded objects. It is called twice from elfcore: Once to simply count
2149 2152 * relevant sections, and again later to copy those sections once an adequate
2150 2153 * buffer has been allocated for the shdr details.
2151 2154 */
2152 2155 static int
2153 2156 elf_process_scns(elf_core_ctx_t *ctx, Shdr *v, uint_t nv, uint_t *nshdrsp)
2154 2157 {
2155 2158 vnode_t *lastvp = NULL;
2156 2159 struct seg *seg;
|
↓ open down ↓ |
40 lines elided |
↑ open up ↑ |
2157 2160 uint_t idx = 0, remain;
2158 2161 shstrtab_t shstrtab;
2159 2162 struct as *as = ctx->ecc_p->p_as;
2160 2163 int error = 0;
2161 2164
2162 2165 ASSERT(AS_WRITE_HELD(as));
2163 2166
2164 2167 if (v != NULL) {
2165 2168 ASSERT(nv != 0);
2166 2169
2167 - shstrtab_init(&shstrtab);
2170 + if (!shstrtab_init(&shstrtab))
2171 + return (ENOMEM);
2168 2172 remain = nv;
2169 2173 } else {
2170 2174 ASSERT(nv == 0);
2171 2175
2172 2176 /*
2173 2177 * The shdrs are being counted, rather than outputting them
2174 2178 * into a buffer. Leave room for two entries: the SHT_NULL at
2175 2179 * index 0 and the shstrtab at the end.
2176 2180 */
2177 2181 remain = UINT_MAX - 2;
2178 2182 }
2179 2183
2180 2184 /* Per the ELF spec, shdr index 0 is reserved. */
2181 2185 idx = 1;
2182 2186 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
2183 2187 vnode_t *mvp;
2184 2188 void *tmp = NULL;
2185 2189 caddr_t saddr = seg->s_base, naddr, eaddr;
2186 2190 size_t segsize;
2187 2191 uint_t count, prot;
2188 2192
2189 2193 /*
2190 2194 * Since we're just looking for text segments of load
2191 2195 * objects, we only care about the protection bits; we don't
2192 2196 * care about the actual size of the segment so we use the
2193 2197 * reserved size. If the segment's size is zero, there's
2194 2198 * something fishy going on so we ignore this segment.
2195 2199 */
2196 2200 if (seg->s_ops != &segvn_ops ||
2197 2201 SEGOP_GETVP(seg, seg->s_base, &mvp) != 0 ||
2198 2202 mvp == lastvp || mvp == NULL || mvp->v_type != VREG ||
2199 2203 (segsize = pr_getsegsize(seg, 1)) == 0)
2200 2204 continue;
2201 2205
2202 2206 eaddr = saddr + segsize;
2203 2207 prot = pr_getprot(seg, 1, &tmp, &saddr, &naddr, eaddr);
2204 2208 pr_getprot_done(&tmp);
|
↓ open down ↓ |
27 lines elided |
↑ open up ↑ |
2205 2209
2206 2210 /*
2207 2211 * Skip this segment unless the protection bits look like
2208 2212 * what we'd expect for a text segment.
2209 2213 */
2210 2214 if ((prot & (PROT_WRITE | PROT_EXEC)) != PROT_EXEC)
2211 2215 continue;
2212 2216
2213 2217 count = elf_process_obj_scns(ctx, mvp, saddr, v, idx, remain,
2214 2218 &shstrtab);
2219 + if (count == UINT_MAX) {
2220 + error = ENOMEM;
2221 + goto done;
2222 + }
2215 2223
2216 2224 ASSERT(count <= remain);
2217 2225 ASSERT(v == NULL || (idx + count) < nv);
2218 2226
2219 2227 remain -= count;
2220 2228 idx += count;
2221 2229 lastvp = mvp;
2222 2230 }
2223 2231
2224 2232 if (v == NULL) {
2225 2233 if (idx == 1) {
2226 2234 *nshdrsp = 0;
2227 2235 } else {
2228 2236 /* Include room for the shrstrtab at the end */
2229 2237 *nshdrsp = idx + 1;
2230 2238 }
2239 + /* No need to free up shstrtab so we can just return. */
2231 2240 return (0);
2232 2241 }
2233 2242
2234 2243 if (idx != nv - 1) {
2235 2244 cmn_err(CE_WARN, "elfcore: core dump failed for "
2236 2245 "process %d; address space is changing",
2237 2246 ctx->ecc_p->p_pid);
2238 - return (EIO);
2247 + error = EIO;
2248 + goto done;
2239 2249 }
2240 2250
2241 - v[idx].sh_name = shstrtab_ndx(&shstrtab, STR_SHSTRTAB);
2251 + if (!shstrtab_ndx(&shstrtab, shstrtab_data[STR_SHSTRTAB],
2252 + &v[idx].sh_name)) {
2253 + error = ENOMEM;
2254 + goto done;
2255 + }
2242 2256 v[idx].sh_size = shstrtab_size(&shstrtab);
2243 2257 v[idx].sh_addralign = 1;
2244 2258 v[idx].sh_offset = ctx->ecc_doffset;
2245 2259 v[idx].sh_flags = SHF_STRINGS;
2246 2260 v[idx].sh_type = SHT_STRTAB;
2247 2261
2248 2262 elf_ctx_resize_scratch(ctx, v[idx].sh_size);
2249 2263 VERIFY3U(ctx->ecc_bufsz, >=, v[idx].sh_size);
2250 2264 shstrtab_dump(&shstrtab, ctx->ecc_buf);
2251 2265
2252 2266 error = core_write(ctx->ecc_vp, UIO_SYSSPACE, ctx->ecc_doffset,
2253 2267 ctx->ecc_buf, v[idx].sh_size, ctx->ecc_rlimit, ctx->ecc_credp);
2254 2268 if (error == 0) {
2255 2269 ctx->ecc_doffset += v[idx].sh_size;
2256 2270 }
2257 2271
2272 +done:
2273 + if (v != NULL)
2274 + shstrtab_fini(&shstrtab);
2258 2275 return (error);
2259 2276 }
2260 2277
2261 2278 int
2262 2279 elfcore(vnode_t *vp, proc_t *p, cred_t *credp, rlim64_t rlimit, int sig,
2263 2280 core_content_t content)
2264 2281 {
2265 2282 u_offset_t poffset, soffset, doffset;
2266 2283 int error;
2267 2284 uint_t i, nphdrs, nshdrs;
2268 2285 struct seg *seg;
2269 2286 struct as *as = p->p_as;
2270 2287 void *bigwad, *zeropg = NULL;
2271 2288 size_t bigsize, phdrsz, shdrsz;
2272 2289 Ehdr *ehdr;
2273 2290 Phdr *phdr;
2274 2291 Shdr shdr0;
2275 2292 caddr_t brkbase, stkbase;
2276 2293 size_t brksize, stksize;
2277 2294 boolean_t overflowed = B_FALSE, retried = B_FALSE;
2278 2295 klwp_t *lwp = ttolwp(curthread);
2279 2296 elf_core_ctx_t ctx = {
2280 2297 .ecc_vp = vp,
2281 2298 .ecc_p = p,
2282 2299 .ecc_credp = credp,
2283 2300 .ecc_rlimit = rlimit,
2284 2301 .ecc_content = content,
2285 2302 .ecc_doffset = 0,
2286 2303 .ecc_buf = NULL,
2287 2304 .ecc_bufsz = 0
2288 2305 };
2289 2306
2290 2307 top:
2291 2308 /*
2292 2309 * Make sure we have everything we need (registers, etc.).
2293 2310 * All other lwps have already stopped and are in an orderly state.
2294 2311 */
|
↓ open down ↓ |
27 lines elided |
↑ open up ↑ |
2295 2312 ASSERT(p == ttoproc(curthread));
2296 2313 prstop(0, 0);
2297 2314
2298 2315 AS_LOCK_ENTER(as, RW_WRITER);
2299 2316 nphdrs = prnsegs(as, 0) + 2; /* two CORE note sections */
2300 2317
2301 2318 /*
2302 2319 * Count the number of section headers we're going to need.
2303 2320 */
2304 2321 nshdrs = 0;
2305 - if (content & (CC_CONTENT_CTF | CC_CONTENT_SYMTAB)) {
2322 + if (content & (CC_CONTENT_CTF | CC_CONTENT_SYMTAB | CC_CONTENT_DEBUG))
2306 2323 VERIFY0(elf_process_scns(&ctx, NULL, 0, &nshdrs));
2307 - }
2308 2324 AS_LOCK_EXIT(as);
2309 2325
2310 2326 /*
2311 2327 * The core file contents may require zero section headers, but if
2312 2328 * we overflow the 16 bits allotted to the program header count in
2313 2329 * the ELF header, we'll need that program header at index zero.
2314 2330 */
2315 2331 if (nshdrs == 0 && nphdrs >= PN_XNUM) {
2316 2332 nshdrs = 1;
2317 2333 }
2318 2334
2319 2335 /*
2320 - * Allocate a buffer which is sized adequately to hold the ehdr, phdrs
2321 - * or shdrs needed to produce the core file. It is used for the three
2322 - * tasks sequentially, not simultaneously, so it does not need space
2323 - * for all three data at once, only the largest one.
2336 + * Allocate a buffer which is sized adequately to hold the ehdr,
2337 + * phdrs, DWARF debug, or shdrs needed to produce the core file. It
2338 + * is used for the four tasks sequentially, not simultaneously, so it
2339 + * does not need space for all four data at once, only the largest
2340 + * one.
2324 2341 */
2325 2342 VERIFY(nphdrs >= 2);
2326 2343 phdrsz = nphdrs * sizeof (Phdr);
2327 2344 shdrsz = nshdrs * sizeof (Shdr);
2328 2345 bigsize = MAX(sizeof (Ehdr), MAX(phdrsz, shdrsz));
2329 2346 bigwad = kmem_alloc(bigsize, KM_SLEEP);
2330 2347
2331 2348 ehdr = (Ehdr *)bigwad;
2332 2349 bzero(ehdr, sizeof (*ehdr));
2333 2350
2334 2351 ehdr->e_ident[EI_MAG0] = ELFMAG0;
2335 2352 ehdr->e_ident[EI_MAG1] = ELFMAG1;
2336 2353 ehdr->e_ident[EI_MAG2] = ELFMAG2;
2337 2354 ehdr->e_ident[EI_MAG3] = ELFMAG3;
2338 2355 ehdr->e_ident[EI_CLASS] = ELFCLASS;
2339 2356 ehdr->e_type = ET_CORE;
2340 2357
2341 2358 #if !defined(_LP64) || defined(_ELF32_COMPAT)
2342 2359
2343 2360 #if defined(__sparc)
2344 2361 ehdr->e_ident[EI_DATA] = ELFDATA2MSB;
2345 2362 ehdr->e_machine = EM_SPARC;
2346 2363 #elif defined(__i386_COMPAT)
2347 2364 ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
2348 2365 ehdr->e_machine = EM_386;
2349 2366 #else
2350 2367 #error "no recognized machine type is defined"
2351 2368 #endif
2352 2369
2353 2370 #else /* !defined(_LP64) || defined(_ELF32_COMPAT) */
2354 2371
2355 2372 #if defined(__sparc)
2356 2373 ehdr->e_ident[EI_DATA] = ELFDATA2MSB;
2357 2374 ehdr->e_machine = EM_SPARCV9;
2358 2375 #elif defined(__amd64)
2359 2376 ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
2360 2377 ehdr->e_machine = EM_AMD64;
2361 2378 #else
2362 2379 #error "no recognized 64-bit machine type is defined"
2363 2380 #endif
2364 2381
2365 2382 #endif /* !defined(_LP64) || defined(_ELF32_COMPAT) */
2366 2383
2367 2384 poffset = sizeof (Ehdr);
2368 2385 soffset = sizeof (Ehdr) + phdrsz;
2369 2386 doffset = sizeof (Ehdr) + phdrsz + shdrsz;
2370 2387 bzero(&shdr0, sizeof (shdr0));
2371 2388
2372 2389 /*
2373 2390 * If the count of program headers or section headers or the index
2374 2391 * of the section string table can't fit in the mere 16 bits
2375 2392 * shortsightedly allotted to them in the ELF header, we use the
2376 2393 * extended formats and put the real values in the section header
2377 2394 * as index 0.
2378 2395 */
2379 2396 if (nphdrs >= PN_XNUM) {
2380 2397 ehdr->e_phnum = PN_XNUM;
2381 2398 shdr0.sh_info = nphdrs;
2382 2399 } else {
2383 2400 ehdr->e_phnum = (unsigned short)nphdrs;
2384 2401 }
2385 2402
2386 2403 if (nshdrs > 0) {
2387 2404 if (nshdrs >= SHN_LORESERVE) {
2388 2405 ehdr->e_shnum = 0;
2389 2406 shdr0.sh_size = nshdrs;
2390 2407 } else {
2391 2408 ehdr->e_shnum = (unsigned short)nshdrs;
2392 2409 }
2393 2410
2394 2411 if (nshdrs - 1 >= SHN_LORESERVE) {
2395 2412 ehdr->e_shstrndx = SHN_XINDEX;
2396 2413 shdr0.sh_link = nshdrs - 1;
2397 2414 } else {
2398 2415 ehdr->e_shstrndx = (unsigned short)(nshdrs - 1);
2399 2416 }
2400 2417
2401 2418 ehdr->e_shoff = soffset;
2402 2419 ehdr->e_shentsize = sizeof (Shdr);
2403 2420 }
2404 2421
2405 2422 ehdr->e_ident[EI_VERSION] = EV_CURRENT;
2406 2423 ehdr->e_version = EV_CURRENT;
2407 2424 ehdr->e_ehsize = sizeof (Ehdr);
2408 2425 ehdr->e_phoff = poffset;
2409 2426 ehdr->e_phentsize = sizeof (Phdr);
2410 2427
2411 2428 if (error = core_write(vp, UIO_SYSSPACE, (offset_t)0, ehdr,
2412 2429 sizeof (Ehdr), rlimit, credp)) {
2413 2430 goto done;
2414 2431 }
2415 2432
2416 2433 phdr = (Phdr *)bigwad;
2417 2434 bzero(phdr, phdrsz);
2418 2435
2419 2436 setup_old_note_header(&phdr[0], p);
2420 2437 phdr[0].p_offset = doffset = roundup(doffset, sizeof (Word));
2421 2438 doffset += phdr[0].p_filesz;
2422 2439
2423 2440 setup_note_header(&phdr[1], p);
2424 2441 phdr[1].p_offset = doffset = roundup(doffset, sizeof (Word));
2425 2442 doffset += phdr[1].p_filesz;
2426 2443
2427 2444 mutex_enter(&p->p_lock);
2428 2445
2429 2446 brkbase = p->p_brkbase;
2430 2447 brksize = p->p_brksize;
2431 2448
2432 2449 stkbase = p->p_usrstack - p->p_stksize;
2433 2450 stksize = p->p_stksize;
2434 2451
2435 2452 mutex_exit(&p->p_lock);
2436 2453
2437 2454 AS_LOCK_ENTER(as, RW_WRITER);
2438 2455 i = 2;
2439 2456 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
2440 2457 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2441 2458 caddr_t saddr, naddr;
2442 2459 void *tmp = NULL;
2443 2460 extern struct seg_ops segspt_shmops;
2444 2461
2445 2462 if ((seg->s_flags & S_HOLE) != 0) {
2446 2463 continue;
2447 2464 }
2448 2465
2449 2466 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2450 2467 uint_t prot;
2451 2468 size_t size;
2452 2469 int type;
2453 2470 vnode_t *mvp;
2454 2471
2455 2472 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2456 2473 prot &= PROT_READ | PROT_WRITE | PROT_EXEC;
2457 2474 if ((size = (size_t)(naddr - saddr)) == 0) {
2458 2475 ASSERT(tmp == NULL);
2459 2476 continue;
2460 2477 } else if (i == nphdrs) {
2461 2478 pr_getprot_done(&tmp);
2462 2479 overflowed = B_TRUE;
2463 2480 break;
2464 2481 }
2465 2482 phdr[i].p_type = PT_LOAD;
2466 2483 phdr[i].p_vaddr = (Addr)(uintptr_t)saddr;
2467 2484 phdr[i].p_memsz = size;
2468 2485 if (prot & PROT_READ)
2469 2486 phdr[i].p_flags |= PF_R;
2470 2487 if (prot & PROT_WRITE)
2471 2488 phdr[i].p_flags |= PF_W;
2472 2489 if (prot & PROT_EXEC)
2473 2490 phdr[i].p_flags |= PF_X;
2474 2491
2475 2492 /*
2476 2493 * Figure out which mappings to include in the core.
2477 2494 */
2478 2495 type = SEGOP_GETTYPE(seg, saddr);
2479 2496
2480 2497 if (saddr == stkbase && size == stksize) {
2481 2498 if (!(content & CC_CONTENT_STACK))
2482 2499 goto exclude;
2483 2500
2484 2501 } else if (saddr == brkbase && size == brksize) {
2485 2502 if (!(content & CC_CONTENT_HEAP))
2486 2503 goto exclude;
2487 2504
2488 2505 } else if (seg->s_ops == &segspt_shmops) {
2489 2506 if (type & MAP_NORESERVE) {
2490 2507 if (!(content & CC_CONTENT_DISM))
2491 2508 goto exclude;
2492 2509 } else {
2493 2510 if (!(content & CC_CONTENT_ISM))
2494 2511 goto exclude;
2495 2512 }
2496 2513
2497 2514 } else if (seg->s_ops != &segvn_ops) {
2498 2515 goto exclude;
2499 2516
2500 2517 } else if (type & MAP_SHARED) {
2501 2518 if (shmgetid(p, saddr) != SHMID_NONE) {
2502 2519 if (!(content & CC_CONTENT_SHM))
2503 2520 goto exclude;
2504 2521
2505 2522 } else if (SEGOP_GETVP(seg, seg->s_base,
2506 2523 &mvp) != 0 || mvp == NULL ||
2507 2524 mvp->v_type != VREG) {
2508 2525 if (!(content & CC_CONTENT_SHANON))
2509 2526 goto exclude;
2510 2527
2511 2528 } else {
2512 2529 if (!(content & CC_CONTENT_SHFILE))
2513 2530 goto exclude;
2514 2531 }
2515 2532
2516 2533 } else if (SEGOP_GETVP(seg, seg->s_base, &mvp) != 0 ||
2517 2534 mvp == NULL || mvp->v_type != VREG) {
2518 2535 if (!(content & CC_CONTENT_ANON))
2519 2536 goto exclude;
2520 2537
2521 2538 } else if (prot == (PROT_READ | PROT_EXEC)) {
2522 2539 if (!(content & CC_CONTENT_TEXT))
2523 2540 goto exclude;
2524 2541
2525 2542 } else if (prot == PROT_READ) {
2526 2543 if (!(content & CC_CONTENT_RODATA))
2527 2544 goto exclude;
2528 2545
2529 2546 } else {
2530 2547 if (!(content & CC_CONTENT_DATA))
2531 2548 goto exclude;
2532 2549 }
2533 2550
2534 2551 doffset = roundup(doffset, sizeof (Word));
2535 2552 phdr[i].p_offset = doffset;
2536 2553 phdr[i].p_filesz = size;
2537 2554 doffset += size;
2538 2555 exclude:
2539 2556 i++;
2540 2557 }
2541 2558 VERIFY(tmp == NULL);
2542 2559 if (overflowed)
2543 2560 break;
2544 2561 }
2545 2562 AS_LOCK_EXIT(as);
2546 2563
2547 2564 if (overflowed || i != nphdrs) {
2548 2565 if (!retried) {
2549 2566 retried = B_TRUE;
2550 2567 overflowed = B_FALSE;
2551 2568 kmem_free(bigwad, bigsize);
2552 2569 goto top;
2553 2570 }
2554 2571 cmn_err(CE_WARN, "elfcore: core dump failed for "
2555 2572 "process %d; address space is changing", p->p_pid);
2556 2573 error = EIO;
2557 2574 goto done;
2558 2575 }
2559 2576
2560 2577 if ((error = core_write(vp, UIO_SYSSPACE, poffset,
2561 2578 phdr, phdrsz, rlimit, credp)) != 0) {
2562 2579 goto done;
2563 2580 }
2564 2581
2565 2582 if ((error = write_old_elfnotes(p, sig, vp, phdr[0].p_offset, rlimit,
2566 2583 credp)) != 0) {
2567 2584 goto done;
2568 2585 }
2569 2586 if ((error = write_elfnotes(p, sig, vp, phdr[1].p_offset, rlimit,
2570 2587 credp, content)) != 0) {
2571 2588 goto done;
2572 2589 }
2573 2590
2574 2591 for (i = 2; i < nphdrs; i++) {
2575 2592 prkillinfo_t killinfo;
2576 2593 sigqueue_t *sq;
2577 2594 int sig, j;
2578 2595
2579 2596 if (phdr[i].p_filesz == 0)
2580 2597 continue;
2581 2598
2582 2599 /*
2583 2600 * If we hit a region that was mapped PROT_NONE then we cannot
2584 2601 * continue dumping this normally as the kernel would be unable
2585 2602 * to read from the page and that would result in us failing to
2586 2603 * dump the page. As such, any region mapped PROT_NONE, we dump
2587 2604 * as a zero-filled page such that this is still represented in
2588 2605 * the map.
2589 2606 *
2590 2607 * If dumping out this segment fails, rather than failing
2591 2608 * the core dump entirely, we reset the size of the mapping
2592 2609 * to zero to indicate that the data is absent from the core
2593 2610 * file and or in the PF_SUNW_FAILURE flag to differentiate
2594 2611 * this from mappings that were excluded due to the core file
2595 2612 * content settings.
2596 2613 */
2597 2614 if ((phdr[i].p_flags & (PF_R | PF_W | PF_X)) == 0) {
2598 2615 size_t towrite = phdr[i].p_filesz;
2599 2616 size_t curoff = 0;
2600 2617
2601 2618 if (zeropg == NULL) {
2602 2619 zeropg = kmem_zalloc(elf_zeropg_sz, KM_SLEEP);
2603 2620 }
2604 2621
2605 2622 error = 0;
2606 2623 while (towrite != 0) {
2607 2624 size_t len = MIN(towrite, elf_zeropg_sz);
2608 2625
2609 2626 error = core_write(vp, UIO_SYSSPACE,
2610 2627 phdr[i].p_offset + curoff, zeropg, len,
2611 2628 rlimit, credp);
2612 2629 if (error != 0)
2613 2630 break;
2614 2631
2615 2632 towrite -= len;
2616 2633 curoff += len;
2617 2634 }
2618 2635 } else {
2619 2636 error = core_seg(p, vp, phdr[i].p_offset,
2620 2637 (caddr_t)(uintptr_t)phdr[i].p_vaddr,
2621 2638 phdr[i].p_filesz, rlimit, credp);
2622 2639 }
2623 2640 if (error == 0)
2624 2641 continue;
2625 2642
2626 2643 if ((sig = lwp->lwp_cursig) == 0) {
2627 2644 /*
2628 2645 * We failed due to something other than a signal.
2629 2646 * Since the space reserved for the segment is now
2630 2647 * unused, we stash the errno in the first four
2631 2648 * bytes. This undocumented interface will let us
2632 2649 * understand the nature of the failure.
2633 2650 */
2634 2651 (void) core_write(vp, UIO_SYSSPACE, phdr[i].p_offset,
2635 2652 &error, sizeof (error), rlimit, credp);
2636 2653
2637 2654 phdr[i].p_filesz = 0;
2638 2655 phdr[i].p_flags |= PF_SUNW_FAILURE;
2639 2656 if ((error = core_write(vp, UIO_SYSSPACE,
2640 2657 poffset + sizeof (Phdr) * i, &phdr[i],
2641 2658 sizeof (Phdr), rlimit, credp)) != 0)
2642 2659 goto done;
2643 2660
2644 2661 continue;
2645 2662 }
2646 2663
2647 2664 /*
2648 2665 * We took a signal. We want to abort the dump entirely, but
2649 2666 * we also want to indicate what failed and why. We therefore
2650 2667 * use the space reserved for the first failing segment to
2651 2668 * write our error (which, for purposes of compatability with
2652 2669 * older core dump readers, we set to EINTR) followed by any
2653 2670 * siginfo associated with the signal.
2654 2671 */
2655 2672 bzero(&killinfo, sizeof (killinfo));
2656 2673 killinfo.prk_error = EINTR;
2657 2674
2658 2675 sq = sig == SIGKILL ? curproc->p_killsqp : lwp->lwp_curinfo;
2659 2676
2660 2677 if (sq != NULL) {
2661 2678 bcopy(&sq->sq_info, &killinfo.prk_info,
2662 2679 sizeof (sq->sq_info));
2663 2680 } else {
2664 2681 killinfo.prk_info.si_signo = lwp->lwp_cursig;
2665 2682 killinfo.prk_info.si_code = SI_NOINFO;
2666 2683 }
2667 2684
2668 2685 #if (defined(_SYSCALL32_IMPL) || defined(_LP64))
2669 2686 /*
2670 2687 * If this is a 32-bit process, we need to translate from the
2671 2688 * native siginfo to the 32-bit variant. (Core readers must
2672 2689 * always have the same data model as their target or must
2673 2690 * be aware of -- and compensate for -- data model differences.)
2674 2691 */
2675 2692 if (curproc->p_model == DATAMODEL_ILP32) {
2676 2693 siginfo32_t si32;
2677 2694
2678 2695 siginfo_kto32((k_siginfo_t *)&killinfo.prk_info, &si32);
2679 2696 bcopy(&si32, &killinfo.prk_info, sizeof (si32));
2680 2697 }
2681 2698 #endif
2682 2699
2683 2700 (void) core_write(vp, UIO_SYSSPACE, phdr[i].p_offset,
2684 2701 &killinfo, sizeof (killinfo), rlimit, credp);
2685 2702
2686 2703 /*
2687 2704 * For the segment on which we took the signal, indicate that
2688 2705 * its data now refers to a siginfo.
2689 2706 */
2690 2707 phdr[i].p_filesz = 0;
2691 2708 phdr[i].p_flags |= PF_SUNW_FAILURE | PF_SUNW_KILLED |
2692 2709 PF_SUNW_SIGINFO;
2693 2710
2694 2711 /*
2695 2712 * And for every other segment, indicate that its absence
2696 2713 * is due to a signal.
2697 2714 */
2698 2715 for (j = i + 1; j < nphdrs; j++) {
2699 2716 phdr[j].p_filesz = 0;
2700 2717 phdr[j].p_flags |= PF_SUNW_FAILURE | PF_SUNW_KILLED;
2701 2718 }
2702 2719
2703 2720 /*
2704 2721 * Finally, write out our modified program headers.
2705 2722 */
2706 2723 if ((error = core_write(vp, UIO_SYSSPACE,
2707 2724 poffset + sizeof (Phdr) * i, &phdr[i],
2708 2725 sizeof (Phdr) * (nphdrs - i), rlimit, credp)) != 0) {
2709 2726 goto done;
2710 2727 }
2711 2728
2712 2729 break;
2713 2730 }
2714 2731
2715 2732 if (nshdrs > 0) {
2716 2733 Shdr *shdr = (Shdr *)bigwad;
2717 2734
2718 2735 bzero(shdr, shdrsz);
2719 2736 if (nshdrs > 1) {
2720 2737 ctx.ecc_doffset = doffset;
2721 2738 AS_LOCK_ENTER(as, RW_WRITER);
2722 2739 error = elf_process_scns(&ctx, shdr, nshdrs, NULL);
2723 2740 AS_LOCK_EXIT(as);
2724 2741 if (error != 0) {
2725 2742 goto done;
2726 2743 }
2727 2744 }
2728 2745 /* Copy any extended format data destined for the first shdr */
2729 2746 bcopy(&shdr0, shdr, sizeof (shdr0));
2730 2747
2731 2748 error = core_write(vp, UIO_SYSSPACE, soffset, shdr, shdrsz,
2732 2749 rlimit, credp);
2733 2750 }
2734 2751
2735 2752 done:
2736 2753 if (zeropg != NULL)
2737 2754 kmem_free(zeropg, elf_zeropg_sz);
2738 2755 if (ctx.ecc_bufsz != 0) {
2739 2756 kmem_free(ctx.ecc_buf, ctx.ecc_bufsz);
2740 2757 }
2741 2758 kmem_free(bigwad, bigsize);
2742 2759 return (error);
2743 2760 }
2744 2761
2745 2762 #ifndef _ELF32_COMPAT
2746 2763
2747 2764 static struct execsw esw = {
2748 2765 #ifdef _LP64
2749 2766 elf64magicstr,
2750 2767 #else /* _LP64 */
2751 2768 elf32magicstr,
2752 2769 #endif /* _LP64 */
2753 2770 0,
2754 2771 5,
2755 2772 elfexec,
2756 2773 elfcore
2757 2774 };
2758 2775
2759 2776 static struct modlexec modlexec = {
2760 2777 &mod_execops, "exec module for elf", &esw
2761 2778 };
2762 2779
2763 2780 #ifdef _LP64
2764 2781 extern int elf32exec(vnode_t *vp, execa_t *uap, uarg_t *args,
2765 2782 intpdata_t *idatap, int level, size_t *execsz,
2766 2783 int setid, caddr_t exec_file, cred_t *cred,
2767 2784 int *brand_action);
2768 2785 extern int elf32core(vnode_t *vp, proc_t *p, cred_t *credp,
2769 2786 rlim64_t rlimit, int sig, core_content_t content);
2770 2787
2771 2788 static struct execsw esw32 = {
2772 2789 elf32magicstr,
2773 2790 0,
2774 2791 5,
2775 2792 elf32exec,
2776 2793 elf32core
2777 2794 };
2778 2795
2779 2796 static struct modlexec modlexec32 = {
2780 2797 &mod_execops, "32-bit exec module for elf", &esw32
2781 2798 };
2782 2799 #endif /* _LP64 */
2783 2800
2784 2801 static struct modlinkage modlinkage = {
2785 2802 MODREV_1,
2786 2803 (void *)&modlexec,
2787 2804 #ifdef _LP64
2788 2805 (void *)&modlexec32,
2789 2806 #endif /* _LP64 */
2790 2807 NULL
2791 2808 };
2792 2809
2793 2810 int
2794 2811 _init(void)
2795 2812 {
2796 2813 return (mod_install(&modlinkage));
2797 2814 }
2798 2815
2799 2816 int
2800 2817 _fini(void)
2801 2818 {
2802 2819 return (mod_remove(&modlinkage));
2803 2820 }
2804 2821
2805 2822 int
2806 2823 _info(struct modinfo *modinfop)
2807 2824 {
2808 2825 return (mod_info(&modlinkage, modinfop));
2809 2826 }
2810 2827
2811 2828 #endif /* !_ELF32_COMPAT */
|
↓ open down ↓ |
478 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX