Print this page
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/exec/elf/elf.c
+++ new/usr/src/uts/common/exec/elf/elf.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 27 /* All Rights Reserved */
28 28 /*
29 29 * Copyright 2016 Joyent, Inc.
30 30 */
31 31
32 32 #include <sys/types.h>
33 33 #include <sys/param.h>
34 34 #include <sys/thread.h>
35 35 #include <sys/sysmacros.h>
36 36 #include <sys/signal.h>
37 37 #include <sys/cred.h>
38 38 #include <sys/user.h>
39 39 #include <sys/errno.h>
40 40 #include <sys/vnode.h>
41 41 #include <sys/mman.h>
42 42 #include <sys/kmem.h>
43 43 #include <sys/proc.h>
44 44 #include <sys/pathname.h>
45 45 #include <sys/cmn_err.h>
46 46 #include <sys/systm.h>
47 47 #include <sys/elf.h>
48 48 #include <sys/vmsystm.h>
49 49 #include <sys/debug.h>
50 50 #include <sys/auxv.h>
51 51 #include <sys/exec.h>
52 52 #include <sys/prsystm.h>
53 53 #include <vm/as.h>
54 54 #include <vm/rm.h>
55 55 #include <vm/seg.h>
56 56 #include <vm/seg_vn.h>
57 57 #include <sys/modctl.h>
58 58 #include <sys/systeminfo.h>
59 59 #include <sys/vmparam.h>
60 60 #include <sys/machelf.h>
61 61 #include <sys/shm_impl.h>
62 62 #include <sys/archsystm.h>
63 63 #include <sys/fasttrap.h>
64 64 #include <sys/brand.h>
65 65 #include "elf_impl.h"
66 66 #include <sys/sdt.h>
67 67 #include <sys/siginfo.h>
68 68
69 69 #if defined(__x86)
70 70 #include <sys/comm_page_util.h>
71 71 #endif /* defined(__x86) */
72 72
73 73
74 74 extern int at_flags;
75 75
76 76 #define ORIGIN_STR "ORIGIN"
77 77 #define ORIGIN_STR_SIZE 6
78 78
79 79 static int getelfhead(vnode_t *, cred_t *, Ehdr *, int *, int *, int *);
80 80 static int getelfphdr(vnode_t *, cred_t *, const Ehdr *, int, caddr_t *,
81 81 ssize_t *);
82 82 static int getelfshdr(vnode_t *, cred_t *, const Ehdr *, int, int, caddr_t *,
83 83 ssize_t *, caddr_t *, ssize_t *);
84 84 static size_t elfsize(Ehdr *, int, caddr_t, uintptr_t *);
85 85 static int mapelfexec(vnode_t *, Ehdr *, int, caddr_t,
86 86 Phdr **, Phdr **, Phdr **, Phdr **, Phdr *,
87 87 caddr_t *, caddr_t *, intptr_t *, intptr_t *, size_t, long *, size_t *);
88 88
89 89 typedef enum {
90 90 STR_CTF,
91 91 STR_SYMTAB,
92 92 STR_DYNSYM,
93 93 STR_STRTAB,
94 94 STR_DYNSTR,
95 95 STR_SHSTRTAB,
96 96 STR_NUM
97 97 } shstrtype_t;
98 98
99 99 static const char *shstrtab_data[] = {
100 100 ".SUNW_ctf",
101 101 ".symtab",
102 102 ".dynsym",
103 103 ".strtab",
104 104 ".dynstr",
105 105 ".shstrtab"
106 106 };
107 107
108 108 typedef struct shstrtab {
109 109 int sst_ndx[STR_NUM];
110 110 int sst_cur;
111 111 } shstrtab_t;
112 112
113 113 static void
114 114 shstrtab_init(shstrtab_t *s)
115 115 {
116 116 bzero(&s->sst_ndx, sizeof (s->sst_ndx));
117 117 s->sst_cur = 1;
118 118 }
119 119
120 120 static int
121 121 shstrtab_ndx(shstrtab_t *s, shstrtype_t type)
122 122 {
123 123 int ret;
124 124
125 125 if ((ret = s->sst_ndx[type]) != 0)
126 126 return (ret);
127 127
128 128 ret = s->sst_ndx[type] = s->sst_cur;
129 129 s->sst_cur += strlen(shstrtab_data[type]) + 1;
130 130
131 131 return (ret);
132 132 }
133 133
134 134 static size_t
135 135 shstrtab_size(const shstrtab_t *s)
136 136 {
137 137 return (s->sst_cur);
138 138 }
139 139
140 140 static void
141 141 shstrtab_dump(const shstrtab_t *s, char *buf)
142 142 {
143 143 int i, ndx;
144 144
145 145 *buf = '\0';
146 146 for (i = 0; i < STR_NUM; i++) {
147 147 if ((ndx = s->sst_ndx[i]) != 0)
148 148 (void) strcpy(buf + ndx, shstrtab_data[i]);
149 149 }
150 150 }
151 151
152 152 static int
153 153 dtrace_safe_phdr(Phdr *phdrp, struct uarg *args, uintptr_t base)
154 154 {
155 155 ASSERT(phdrp->p_type == PT_SUNWDTRACE);
156 156
157 157 /*
158 158 * See the comment in fasttrap.h for information on how to safely
159 159 * update this program header.
160 160 */
161 161 if (phdrp->p_memsz < PT_SUNWDTRACE_SIZE ||
162 162 (phdrp->p_flags & (PF_R | PF_W | PF_X)) != (PF_R | PF_W | PF_X))
163 163 return (-1);
164 164
165 165 args->thrptr = phdrp->p_vaddr + base;
166 166
167 167 return (0);
168 168 }
169 169
170 170 /*
171 171 * Map in the executable pointed to by vp. Returns 0 on success. Note that
172 172 * this function currently has the maximum number of arguments allowed by
173 173 * modstubs on x86 (MAXNARG)! Do _not_ add to this function signature without
174 174 * adding to MAXNARG. (Better yet, do not add to this monster of a function
175 175 * signature!)
176 176 */
177 177 int
178 178 mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
179 179 intptr_t *voffset, caddr_t exec_file, char **interpp, caddr_t *bssbase,
180 180 caddr_t *brkbase, size_t *brksize, uintptr_t *lddatap, uintptr_t *minaddrp)
181 181 {
182 182 size_t len;
183 183 struct vattr vat;
184 184 caddr_t phdrbase = NULL;
185 185 ssize_t phdrsize;
186 186 int nshdrs, shstrndx, nphdrs;
187 187 int error = 0;
188 188 Phdr *uphdr = NULL;
189 189 Phdr *junk = NULL;
190 190 Phdr *dynphdr = NULL;
191 191 Phdr *dtrphdr = NULL;
192 192 char *interp = NULL;
193 193 uintptr_t lddata;
194 194 long execsz;
195 195 intptr_t minaddr;
196 196
197 197 if (lddatap != NULL)
198 198 *lddatap = NULL;
199 199
200 200 if (minaddrp != NULL)
201 201 *minaddrp = NULL;
202 202
203 203 if (error = execpermissions(vp, &vat, args)) {
204 204 uprintf("%s: Cannot execute %s\n", exec_file, args->pathname);
205 205 return (error);
206 206 }
207 207
208 208 if ((error = getelfhead(vp, CRED(), ehdr, &nshdrs, &shstrndx,
209 209 &nphdrs)) != 0 ||
210 210 (error = getelfphdr(vp, CRED(), ehdr, nphdrs, &phdrbase,
211 211 &phdrsize)) != 0) {
212 212 uprintf("%s: Cannot read %s\n", exec_file, args->pathname);
213 213 return (error);
214 214 }
215 215
216 216 if ((len = elfsize(ehdr, nphdrs, phdrbase, &lddata)) == 0) {
217 217 uprintf("%s: Nothing to load in %s", exec_file, args->pathname);
218 218 kmem_free(phdrbase, phdrsize);
219 219 return (ENOEXEC);
220 220 }
221 221 if (lddatap != NULL)
222 222 *lddatap = lddata;
223 223
224 224 if (error = mapelfexec(vp, ehdr, nphdrs, phdrbase, &uphdr, &dynphdr,
225 225 &junk, &dtrphdr, NULL, bssbase, brkbase, voffset, &minaddr,
226 226 len, &execsz, brksize)) {
227 227 uprintf("%s: Cannot map %s\n", exec_file, args->pathname);
228 228 if (uphdr != NULL && uphdr->p_flags == 0)
229 229 kmem_free(uphdr, sizeof (Phdr));
230 230 kmem_free(phdrbase, phdrsize);
231 231 return (error);
232 232 }
233 233
234 234 if (minaddrp != NULL)
235 235 *minaddrp = minaddr;
236 236
237 237 /*
238 238 * If the executable requires an interpreter, determine its name.
239 239 */
240 240 if (dynphdr != NULL) {
241 241 ssize_t resid;
242 242
243 243 if (dynphdr->p_filesz > MAXPATHLEN || dynphdr->p_filesz == 0) {
244 244 uprintf("%s: Invalid interpreter\n", exec_file);
245 245 kmem_free(phdrbase, phdrsize);
246 246 return (ENOEXEC);
247 247 }
248 248
249 249 interp = kmem_alloc(MAXPATHLEN, KM_SLEEP);
250 250
251 251 if ((error = vn_rdwr(UIO_READ, vp, interp, dynphdr->p_filesz,
252 252 (offset_t)dynphdr->p_offset, UIO_SYSSPACE, 0,
253 253 (rlim64_t)0, CRED(), &resid)) != 0 || resid != 0 ||
254 254 interp[dynphdr->p_filesz - 1] != '\0') {
255 255 uprintf("%s: Cannot obtain interpreter pathname\n",
256 256 exec_file);
257 257 kmem_free(interp, MAXPATHLEN);
258 258 kmem_free(phdrbase, phdrsize);
259 259 return (error != 0 ? error : ENOEXEC);
260 260 }
261 261 }
262 262
263 263 /*
264 264 * If this is a statically linked executable, voffset should indicate
265 265 * the address of the executable itself (it normally holds the address
266 266 * of the interpreter).
267 267 */
268 268 if (ehdr->e_type == ET_EXEC && interp == NULL)
269 269 *voffset = minaddr;
270 270
271 271 /*
272 272 * If the caller has asked for the interpreter name, return it (it's
273 273 * up to the caller to free it); if the caller hasn't asked for it,
274 274 * free it ourselves.
275 275 */
276 276 if (interpp != NULL) {
277 277 *interpp = interp;
278 278 } else if (interp != NULL) {
279 279 kmem_free(interp, MAXPATHLEN);
280 280 }
281 281
282 282 if (uphdr != NULL) {
283 283 *uphdr_vaddr = uphdr->p_vaddr;
284 284
285 285 if (uphdr->p_flags == 0)
286 286 kmem_free(uphdr, sizeof (Phdr));
287 287 } else if (ehdr->e_type == ET_DYN) {
288 288 /*
289 289 * If we don't have a uphdr, we'll apply the logic found
290 290 * in mapelfexec() and use the p_vaddr of the first PT_LOAD
291 291 * section as the base address of the object.
292 292 */
293 293 Phdr *phdr = (Phdr *)phdrbase;
294 294 int i, hsize = ehdr->e_phentsize;
295 295
296 296 for (i = nphdrs; i > 0; i--) {
297 297 if (phdr->p_type == PT_LOAD) {
298 298 *uphdr_vaddr = (uintptr_t)phdr->p_vaddr +
299 299 ehdr->e_phoff;
300 300 break;
301 301 }
302 302
303 303 phdr = (Phdr *)((caddr_t)phdr + hsize);
304 304 }
305 305
306 306 /*
307 307 * If we don't have a PT_LOAD segment, we should have returned
308 308 * ENOEXEC when elfsize() returned 0, above.
309 309 */
310 310 VERIFY(i > 0);
311 311 } else {
312 312 *uphdr_vaddr = (Addr)-1;
313 313 }
314 314
315 315 kmem_free(phdrbase, phdrsize);
316 316 return (error);
317 317 }
318 318
319 319 /*ARGSUSED*/
320 320 int
321 321 elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
322 322 int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred,
323 323 int *brand_action)
324 324 {
325 325 caddr_t phdrbase = NULL;
326 326 caddr_t bssbase = 0;
327 327 caddr_t brkbase = 0;
328 328 size_t brksize = 0;
329 329 ssize_t dlnsize, nsize = 0;
330 330 aux_entry_t *aux;
331 331 int error;
332 332 ssize_t resid;
333 333 int fd = -1;
334 334 intptr_t voffset;
335 335 Phdr *dyphdr = NULL;
336 336 Phdr *stphdr = NULL;
337 337 Phdr *uphdr = NULL;
338 338 Phdr *junk = NULL;
339 339 size_t len;
340 340 ssize_t phdrsize;
341 341 int postfixsize = 0;
342 342 int i, hsize;
343 343 Phdr *phdrp;
344 344 Phdr *dataphdrp = NULL;
345 345 Phdr *dtrphdr;
346 346 Phdr *capphdr = NULL;
347 347 Cap *cap = NULL;
348 348 ssize_t capsize;
349 349 int hasu = 0;
350 350 int hasauxv = 0;
351 351 int hasdy = 0;
352 352 int branded = 0;
353 353 int dynuphdr = 0;
354 354
355 355 struct proc *p = ttoproc(curthread);
356 356 struct user *up = PTOU(p);
357 357 struct bigwad {
358 358 Ehdr ehdr;
359 359 aux_entry_t elfargs[__KERN_NAUXV_IMPL];
360 360 char dl_name[MAXPATHLEN];
361 361 char pathbuf[MAXPATHLEN];
362 362 struct vattr vattr;
363 363 struct execenv exenv;
364 364 } *bigwad; /* kmem_alloc this behemoth so we don't blow stack */
365 365 Ehdr *ehdrp;
366 366 int nshdrs, shstrndx, nphdrs;
367 367 char *dlnp;
368 368 char *pathbufp;
369 369 rlim64_t limit;
370 370 rlim64_t roundlimit;
371 371
372 372 ASSERT(p->p_model == DATAMODEL_ILP32 || p->p_model == DATAMODEL_LP64);
373 373
374 374 bigwad = kmem_alloc(sizeof (struct bigwad), KM_SLEEP);
375 375 ehdrp = &bigwad->ehdr;
376 376 dlnp = bigwad->dl_name;
377 377 pathbufp = bigwad->pathbuf;
378 378
379 379 /*
380 380 * Obtain ELF and program header information.
381 381 */
382 382 if ((error = getelfhead(vp, CRED(), ehdrp, &nshdrs, &shstrndx,
383 383 &nphdrs)) != 0 ||
384 384 (error = getelfphdr(vp, CRED(), ehdrp, nphdrs, &phdrbase,
385 385 &phdrsize)) != 0)
386 386 goto out;
387 387
388 388 /*
389 389 * Prevent executing an ELF file that has no entry point.
390 390 */
391 391 if (ehdrp->e_entry == 0) {
392 392 uprintf("%s: Bad entry point\n", exec_file);
393 393 goto bad;
394 394 }
395 395
396 396 /*
397 397 * Put data model that we're exec-ing to into the args passed to
398 398 * exec_args(), so it will know what it is copying to on new stack.
399 399 * Now that we know whether we are exec-ing a 32-bit or 64-bit
400 400 * executable, we can set execsz with the appropriate NCARGS.
401 401 */
402 402 #ifdef _LP64
403 403 if (ehdrp->e_ident[EI_CLASS] == ELFCLASS32) {
404 404 args->to_model = DATAMODEL_ILP32;
405 405 *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS32-1);
406 406 } else {
407 407 args->to_model = DATAMODEL_LP64;
408 408 if (!args->stk_prot_override) {
409 409 args->stk_prot &= ~PROT_EXEC;
410 410 }
411 411 #if defined(__i386) || defined(__amd64)
412 412 args->dat_prot &= ~PROT_EXEC;
413 413 #endif
414 414 *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS64-1);
415 415 }
416 416 #else /* _LP64 */
417 417 args->to_model = DATAMODEL_ILP32;
418 418 *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS-1);
419 419 #endif /* _LP64 */
420 420
421 421 /*
422 422 * We delay invoking the brand callback until we've figured out what
423 423 * kind of elf binary we're trying to run, 32-bit or 64-bit. We do this
424 424 * because now the brand library can just check args->to_model to see if
425 425 * the target is 32-bit or 64-bit without having do duplicate all the
426 426 * code above.
427 427 *
428 428 * We also give the brand a chance to indicate that based on the ELF
429 429 * OSABI of the target binary it should become unbranded and optionally
430 430 * indicate that it should be treated as existing in a specific prefix.
431 431 *
432 432 * Note that if a brand opts to go down this route it does not actually
433 433 * end up being debranded. In other words, future programs that exec
434 434 * will still be considered for branding unless this escape hatch is
435 435 * used. Consider the case of lx brand for example. If a user runs
436 436 * /native/usr/sbin/dtrace -c /bin/ls, the isaexec and normal executable
437 437 * of DTrace that's in /native will take this escape hatch and be run
438 438 * and interpreted using the normal system call table; however, the
439 439 * execution of a non-illumos binary in the form of /bin/ls will still
440 440 * be branded and be subject to all of the normal actions of the brand.
441 441 *
442 442 * The level checks associated with brand handling below are used to
443 443 * prevent a loop since the brand elfexec function typically comes back
444 444 * through this function. We must check <= here since the nested
445 445 * handling in the #! interpreter code will increment the level before
446 446 * calling gexec to run the final elfexec interpreter.
447 447 */
448 448 if ((level <= INTP_MAXDEPTH) && (*brand_action != EBA_NATIVE) &&
449 449 (PROC_IS_BRANDED(p)) && (BROP(p)->b_native_exec != NULL)) {
450 450 if (BROP(p)->b_native_exec(ehdrp->e_ident[EI_OSABI],
451 451 &args->brand_nroot) == B_TRUE) {
452 452 ASSERT(ehdrp->e_ident[EI_OSABI]);
453 453 *brand_action = EBA_NATIVE;
454 454 /* Add one for the trailing '/' in the path */
455 455 if (args->brand_nroot != NULL)
456 456 nsize = strlen(args->brand_nroot) + 1;
457 457 }
458 458 }
459 459
460 460 if ((level <= INTP_MAXDEPTH) &&
461 461 (*brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
462 462 error = BROP(p)->b_elfexec(vp, uap, args,
463 463 idatap, level + 1, execsz, setid, exec_file, cred,
464 464 brand_action);
465 465 goto out;
466 466 }
467 467
468 468 /*
469 469 * Determine aux size now so that stack can be built
470 470 * in one shot (except actual copyout of aux image),
471 471 * determine any non-default stack protections,
472 472 * and still have this code be machine independent.
473 473 */
474 474 hsize = ehdrp->e_phentsize;
475 475 phdrp = (Phdr *)phdrbase;
476 476 for (i = nphdrs; i > 0; i--) {
477 477 switch (phdrp->p_type) {
478 478 case PT_INTERP:
479 479 hasauxv = hasdy = 1;
480 480 break;
481 481 case PT_PHDR:
482 482 hasu = 1;
483 483 break;
484 484 case PT_SUNWSTACK:
485 485 args->stk_prot = PROT_USER;
486 486 if (phdrp->p_flags & PF_R)
487 487 args->stk_prot |= PROT_READ;
488 488 if (phdrp->p_flags & PF_W)
489 489 args->stk_prot |= PROT_WRITE;
490 490 if (phdrp->p_flags & PF_X)
491 491 args->stk_prot |= PROT_EXEC;
492 492 break;
493 493 case PT_LOAD:
494 494 dataphdrp = phdrp;
495 495 break;
496 496 case PT_SUNWCAP:
497 497 capphdr = phdrp;
498 498 break;
499 499 }
500 500 phdrp = (Phdr *)((caddr_t)phdrp + hsize);
501 501 }
502 502
503 503 if (ehdrp->e_type != ET_EXEC) {
504 504 dataphdrp = NULL;
505 505 hasauxv = 1;
506 506 }
507 507
508 508 /* Copy BSS permissions to args->dat_prot */
509 509 if (dataphdrp != NULL) {
510 510 args->dat_prot = PROT_USER;
511 511 if (dataphdrp->p_flags & PF_R)
512 512 args->dat_prot |= PROT_READ;
513 513 if (dataphdrp->p_flags & PF_W)
514 514 args->dat_prot |= PROT_WRITE;
515 515 if (dataphdrp->p_flags & PF_X)
516 516 args->dat_prot |= PROT_EXEC;
517 517 }
518 518
519 519 /*
520 520 * If a auxvector will be required - reserve the space for
521 521 * it now. This may be increased by exec_args if there are
|
↓ open down ↓ |
521 lines elided |
↑ open up ↑ |
522 522 * ISA-specific types (included in __KERN_NAUXV_IMPL).
523 523 */
524 524 if (hasauxv) {
525 525 /*
526 526 * If a AUX vector is being built - the base AUX
527 527 * entries are:
528 528 *
529 529 * AT_BASE
530 530 * AT_FLAGS
531 531 * AT_PAGESZ
532 - * AT_RANDOM (added in stk_copyout)
532 + * AT_RANDOM
533 533 * AT_SUN_AUXFLAGS
534 534 * AT_SUN_HWCAP
535 535 * AT_SUN_HWCAP2
536 - * AT_SUN_PLATFORM (added in stk_copyout)
537 - * AT_SUN_EXECNAME (added in stk_copyout)
536 + * AT_SUN_PLATFORM (added in stk_copyout)
537 + * AT_SUN_EXECNAME (added in stk_copyout)
538 538 * AT_NULL
539 539 *
540 540 * total == 10
541 541 */
542 542 if (hasdy && hasu) {
543 543 /*
544 544 * Has PT_INTERP & PT_PHDR - the auxvectors that
545 545 * will be built are:
546 546 *
547 547 * AT_PHDR
548 548 * AT_PHENT
549 549 * AT_PHNUM
550 550 * AT_ENTRY
551 551 * AT_LDDATA
552 552 *
553 553 * total = 5
554 554 */
555 555 args->auxsize = (10 + 5) * sizeof (aux_entry_t);
556 556 } else if (hasdy) {
557 557 /*
558 558 * Has PT_INTERP but no PT_PHDR
559 559 *
560 560 * AT_EXECFD
561 561 * AT_LDDATA
562 562 *
563 563 * total = 2
564 564 */
565 565 args->auxsize = (10 + 2) * sizeof (aux_entry_t);
566 566 } else {
567 567 args->auxsize = 10 * sizeof (aux_entry_t);
568 568 }
569 569 } else {
570 570 args->auxsize = 0;
571 571 }
572 572
573 573 /*
574 574 * If this binary is using an emulator, we need to add an
575 575 * AT_SUN_EMULATOR aux entry.
576 576 */
577 577 if (args->emulator != NULL)
578 578 args->auxsize += sizeof (aux_entry_t);
579 579
580 580 /*
581 581 * If this is a native binary that's been given a modified interpreter
582 582 * root, inform it that the native system exists at that root.
583 583 */
584 584 if (args->brand_nroot != NULL) {
585 585 args->auxsize += sizeof (aux_entry_t);
586 586 }
587 587
588 588
589 589 /*
590 590 * On supported kernels (x86_64) make room in the auxv for the
591 591 * AT_SUN_COMMPAGE entry. This will go unpopulated on i86xpv systems
592 592 * which do not provide such functionality.
593 593 */
594 594 #if defined(__amd64)
595 595 args->auxsize += sizeof (aux_entry_t);
596 596 #endif /* defined(__amd64) */
597 597
598 598 /*
599 599 * If we have user credentials, we'll supply the following entries:
600 600 * AT_SUN_UID
601 601 * AT_SUN_RUID
602 602 * AT_SUN_GID
603 603 * AT_SUN_RGID
604 604 */
605 605 if (cred != NULL) {
606 606 args->auxsize += 4 * sizeof (aux_entry_t);
607 607 }
608 608
609 609 if ((*brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
610 610 branded = 1;
611 611 /*
612 612 * We will be adding 5 entries to the aux vectors. One for
613 613 * the the brandname and 4 for the brand specific aux vectors.
614 614 */
615 615 args->auxsize += 5 * sizeof (aux_entry_t);
616 616 }
617 617
618 618 /* Hardware/Software capabilities */
619 619 if (capphdr != NULL &&
620 620 (capsize = capphdr->p_filesz) > 0 &&
621 621 capsize <= 16 * sizeof (*cap)) {
622 622 int ncaps = capsize / sizeof (*cap);
623 623 Cap *cp;
624 624
625 625 cap = kmem_alloc(capsize, KM_SLEEP);
626 626 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)cap,
627 627 capsize, (offset_t)capphdr->p_offset,
628 628 UIO_SYSSPACE, 0, (rlim64_t)0, CRED(), &resid)) != 0) {
629 629 uprintf("%s: Cannot read capabilities section\n",
630 630 exec_file);
631 631 goto out;
632 632 }
633 633 for (cp = cap; cp < cap + ncaps; cp++) {
634 634 if (cp->c_tag == CA_SUNW_SF_1 &&
635 635 (cp->c_un.c_val & SF1_SUNW_ADDR32)) {
|
↓ open down ↓ |
88 lines elided |
↑ open up ↑ |
636 636 if (args->to_model == DATAMODEL_LP64)
637 637 args->addr32 = 1;
638 638 break;
639 639 }
640 640 }
641 641 }
642 642
643 643 aux = bigwad->elfargs;
644 644 /*
645 645 * Move args to the user's stack.
646 - * This can fill in the AT_SUN_PLATFORM, AT_SUN_EXECNAME and AT_RANDOM
647 - * aux entries.
646 + * This can fill in the AT_SUN_PLATFORM and AT_SUN_EXECNAME aux entries.
648 647 */
649 648 if ((error = exec_args(uap, args, idatap, (void **)&aux)) != 0) {
650 649 if (error == -1) {
651 650 error = ENOEXEC;
652 651 goto bad;
653 652 }
654 653 goto out;
655 654 }
656 655 /* we're single threaded after this point */
657 656
658 657 /*
659 658 * If this is an ET_DYN executable (shared object),
660 659 * determine its memory size so that mapelfexec() can load it.
661 660 */
662 661 if (ehdrp->e_type == ET_DYN)
663 662 len = elfsize(ehdrp, nphdrs, phdrbase, NULL);
664 663 else
665 664 len = 0;
666 665
667 666 dtrphdr = NULL;
668 667
669 668 if ((error = mapelfexec(vp, ehdrp, nphdrs, phdrbase, &uphdr, &dyphdr,
670 669 &stphdr, &dtrphdr, dataphdrp, &bssbase, &brkbase, &voffset, NULL,
671 670 len, execsz, &brksize)) != 0)
672 671 goto bad;
673 672
674 673 if (uphdr != NULL) {
675 674 /*
676 675 * Our uphdr has been dynamically allocated if (and only if)
677 676 * its program header flags are clear.
678 677 */
679 678 dynuphdr = (uphdr->p_flags == 0);
680 679 }
681 680
682 681 if (uphdr != NULL && dyphdr == NULL)
683 682 goto bad;
684 683
685 684 if (dtrphdr != NULL && dtrace_safe_phdr(dtrphdr, args, voffset) != 0) {
686 685 uprintf("%s: Bad DTrace phdr in %s\n", exec_file, exec_file);
687 686 goto bad;
688 687 }
689 688
690 689 if (dyphdr != NULL) {
691 690 size_t len;
692 691 uintptr_t lddata;
693 692 char *p;
694 693 struct vnode *nvp;
695 694
696 695 dlnsize = dyphdr->p_filesz + nsize;
697 696
698 697 if (dlnsize > MAXPATHLEN || dlnsize <= 0)
699 698 goto bad;
700 699
701 700 if (nsize != 0) {
702 701 bcopy(args->brand_nroot, dlnp, nsize - 1);
703 702 dlnp[nsize - 1] = '/';
704 703 }
705 704
706 705 /*
707 706 * Read in "interpreter" pathname.
708 707 */
709 708 if ((error = vn_rdwr(UIO_READ, vp, dlnp + nsize,
710 709 dyphdr->p_filesz, (offset_t)dyphdr->p_offset, UIO_SYSSPACE,
711 710 0, (rlim64_t)0, CRED(), &resid)) != 0) {
712 711 uprintf("%s: Cannot obtain interpreter pathname\n",
713 712 exec_file);
714 713 goto bad;
715 714 }
716 715
717 716 if (resid != 0 || dlnp[dlnsize - 1] != '\0')
718 717 goto bad;
719 718
720 719 /*
721 720 * Search for '$ORIGIN' token in interpreter path.
722 721 * If found, expand it.
723 722 */
724 723 for (p = dlnp; p = strchr(p, '$'); ) {
725 724 uint_t len, curlen;
726 725 char *_ptr;
727 726
728 727 if (strncmp(++p, ORIGIN_STR, ORIGIN_STR_SIZE))
729 728 continue;
730 729
731 730 /*
732 731 * We don't support $ORIGIN on setid programs to close
733 732 * a potential attack vector.
734 733 */
735 734 if ((setid & EXECSETID_SETID) != 0) {
736 735 error = ENOEXEC;
737 736 goto bad;
738 737 }
739 738
740 739 curlen = 0;
741 740 len = p - dlnp - 1;
742 741 if (len) {
743 742 bcopy(dlnp, pathbufp, len);
744 743 curlen += len;
745 744 }
746 745 if (_ptr = strrchr(args->pathname, '/')) {
747 746 len = _ptr - args->pathname;
748 747 if ((curlen + len) > MAXPATHLEN)
749 748 break;
750 749
751 750 bcopy(args->pathname, &pathbufp[curlen], len);
752 751 curlen += len;
753 752 } else {
754 753 /*
755 754 * executable is a basename found in the
756 755 * current directory. So - just substitue
757 756 * '.' for ORIGIN.
758 757 */
759 758 pathbufp[curlen] = '.';
760 759 curlen++;
761 760 }
762 761 p += ORIGIN_STR_SIZE;
763 762 len = strlen(p);
764 763
765 764 if ((curlen + len) > MAXPATHLEN)
766 765 break;
767 766 bcopy(p, &pathbufp[curlen], len);
768 767 curlen += len;
769 768 pathbufp[curlen++] = '\0';
770 769 bcopy(pathbufp, dlnp, curlen);
771 770 }
772 771
773 772 /*
774 773 * /usr/lib/ld.so.1 is known to be a symlink to /lib/ld.so.1
775 774 * (and /usr/lib/64/ld.so.1 is a symlink to /lib/64/ld.so.1).
776 775 * Just in case /usr is not mounted, change it now.
777 776 */
778 777 if (strcmp(dlnp, USR_LIB_RTLD) == 0)
779 778 dlnp += 4;
780 779 error = lookupname(dlnp, UIO_SYSSPACE, FOLLOW, NULLVPP, &nvp);
781 780 if (error && dlnp != bigwad->dl_name) {
782 781 /* new kernel, old user-level */
783 782 error = lookupname(dlnp -= 4, UIO_SYSSPACE, FOLLOW,
784 783 NULLVPP, &nvp);
785 784 }
786 785 if (error) {
787 786 uprintf("%s: Cannot find %s\n", exec_file, dlnp);
788 787 goto bad;
789 788 }
790 789
791 790 /*
792 791 * Setup the "aux" vector.
793 792 */
794 793 if (uphdr) {
795 794 if (ehdrp->e_type == ET_DYN) {
796 795 /* don't use the first page */
797 796 bigwad->exenv.ex_brkbase = (caddr_t)PAGESIZE;
798 797 bigwad->exenv.ex_bssbase = (caddr_t)PAGESIZE;
799 798 } else {
800 799 bigwad->exenv.ex_bssbase = bssbase;
801 800 bigwad->exenv.ex_brkbase = brkbase;
802 801 }
803 802 bigwad->exenv.ex_brksize = brksize;
804 803 bigwad->exenv.ex_magic = elfmagic;
805 804 bigwad->exenv.ex_vp = vp;
806 805 setexecenv(&bigwad->exenv);
807 806
808 807 ADDAUX(aux, AT_PHDR, uphdr->p_vaddr + voffset)
809 808 ADDAUX(aux, AT_PHENT, ehdrp->e_phentsize)
810 809 ADDAUX(aux, AT_PHNUM, nphdrs)
811 810 ADDAUX(aux, AT_ENTRY, ehdrp->e_entry + voffset)
812 811 } else {
813 812 if ((error = execopen(&vp, &fd)) != 0) {
814 813 VN_RELE(nvp);
815 814 goto bad;
816 815 }
817 816
818 817 ADDAUX(aux, AT_EXECFD, fd)
819 818 }
820 819
821 820 if ((error = execpermissions(nvp, &bigwad->vattr, args)) != 0) {
822 821 VN_RELE(nvp);
823 822 uprintf("%s: Cannot execute %s\n", exec_file, dlnp);
824 823 goto bad;
825 824 }
826 825
827 826 /*
828 827 * Now obtain the ELF header along with the entire program
829 828 * header contained in "nvp".
830 829 */
831 830 kmem_free(phdrbase, phdrsize);
832 831 phdrbase = NULL;
833 832 if ((error = getelfhead(nvp, CRED(), ehdrp, &nshdrs,
834 833 &shstrndx, &nphdrs)) != 0 ||
835 834 (error = getelfphdr(nvp, CRED(), ehdrp, nphdrs, &phdrbase,
836 835 &phdrsize)) != 0) {
837 836 VN_RELE(nvp);
838 837 uprintf("%s: Cannot read %s\n", exec_file, dlnp);
839 838 goto bad;
840 839 }
841 840
842 841 /*
843 842 * Determine memory size of the "interpreter's" loadable
844 843 * sections. This size is then used to obtain the virtual
845 844 * address of a hole, in the user's address space, large
846 845 * enough to map the "interpreter".
847 846 */
848 847 if ((len = elfsize(ehdrp, nphdrs, phdrbase, &lddata)) == 0) {
849 848 VN_RELE(nvp);
850 849 uprintf("%s: Nothing to load in %s\n", exec_file, dlnp);
851 850 goto bad;
852 851 }
853 852
854 853 dtrphdr = NULL;
855 854
856 855 error = mapelfexec(nvp, ehdrp, nphdrs, phdrbase, NULL, &junk,
857 856 &junk, &dtrphdr, NULL, NULL, NULL, &voffset, NULL, len,
858 857 execsz, NULL);
859 858
860 859 if (error || junk != NULL) {
861 860 VN_RELE(nvp);
862 861 uprintf("%s: Cannot map %s\n", exec_file, dlnp);
863 862 goto bad;
864 863 }
865 864
866 865 /*
867 866 * We use the DTrace program header to initialize the
868 867 * architecture-specific user per-LWP location. The dtrace
869 868 * fasttrap provider requires ready access to per-LWP scratch
870 869 * space. We assume that there is only one such program header
871 870 * in the interpreter.
872 871 */
873 872 if (dtrphdr != NULL &&
874 873 dtrace_safe_phdr(dtrphdr, args, voffset) != 0) {
875 874 VN_RELE(nvp);
876 875 uprintf("%s: Bad DTrace phdr in %s\n", exec_file, dlnp);
877 876 goto bad;
|
↓ open down ↓ |
220 lines elided |
↑ open up ↑ |
878 877 }
879 878
880 879 VN_RELE(nvp);
881 880 ADDAUX(aux, AT_SUN_LDDATA, voffset + lddata)
882 881 }
883 882
884 883 if (hasauxv) {
885 884 int auxf = AF_SUN_HWCAPVERIFY;
886 885
887 886 /*
888 - * Note: AT_SUN_PLATFORM, AT_SUN_EXECNAME and AT_RANDOM were
889 - * filled in via exec_args()
887 + * Note: AT_SUN_PLATFORM and AT_RANDOM were filled in via
888 + * exec_args()
890 889 */
891 890 ADDAUX(aux, AT_BASE, voffset)
892 891 ADDAUX(aux, AT_FLAGS, at_flags)
893 892 ADDAUX(aux, AT_PAGESZ, PAGESIZE)
894 893 /*
895 894 * Linker flags. (security)
896 895 * p_flag not yet set at this time.
897 896 * We rely on gexec() to provide us with the information.
898 897 * If the application is set-uid but this is not reflected
899 898 * in a mismatch between real/effective uids/gids, then
900 899 * don't treat this as a set-uid exec. So we care about
901 900 * the EXECSETID_UGIDS flag but not the ...SETID flag.
902 901 */
903 902 if ((setid &= ~EXECSETID_SETID) != 0)
904 903 auxf |= AF_SUN_SETUGID;
905 904
906 905 /*
907 906 * If we're running a native process from within a branded
908 907 * zone under pfexec then we clear the AF_SUN_SETUGID flag so
909 908 * that the native ld.so.1 is able to link with the native
910 909 * libraries instead of using the brand libraries that are
911 910 * installed in the zone. We only do this for processes
912 911 * which we trust because we see they are already running
913 912 * under pfexec (where uid != euid). This prevents a
914 913 * malicious user within the zone from crafting a wrapper to
915 914 * run native suid commands with unsecure libraries interposed.
916 915 */
917 916 if ((*brand_action == EBA_NATIVE) && (PROC_IS_BRANDED(p) &&
918 917 (setid &= ~EXECSETID_SETID) != 0))
919 918 auxf &= ~AF_SUN_SETUGID;
920 919
921 920 /*
922 921 * Record the user addr of the auxflags aux vector entry
923 922 * since brands may optionally want to manipulate this field.
924 923 */
925 924 args->auxp_auxflags =
926 925 (char *)((char *)args->stackend +
927 926 ((char *)&aux->a_type -
928 927 (char *)bigwad->elfargs));
929 928 ADDAUX(aux, AT_SUN_AUXFLAGS, auxf);
930 929
931 930 /*
932 931 * Record information about the real and effective user and
933 932 * group IDs.
934 933 */
935 934 if (cred != NULL) {
936 935 ADDAUX(aux, AT_SUN_UID, crgetuid(cred));
937 936 ADDAUX(aux, AT_SUN_RUID, crgetruid(cred));
938 937 ADDAUX(aux, AT_SUN_GID, crgetgid(cred));
939 938 ADDAUX(aux, AT_SUN_RGID, crgetrgid(cred));
940 939 }
941 940
942 941 /*
943 942 * Hardware capability flag word (performance hints)
944 943 * Used for choosing faster library routines.
945 944 * (Potentially different between 32-bit and 64-bit ABIs)
946 945 */
947 946 #if defined(_LP64)
948 947 if (args->to_model == DATAMODEL_NATIVE) {
949 948 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
950 949 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2)
951 950 } else {
952 951 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap32)
953 952 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap32_2)
954 953 }
955 954 #else
956 955 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
957 956 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2)
958 957 #endif
959 958 if (branded) {
960 959 /*
961 960 * Reserve space for the brand-private aux vectors,
962 961 * and record the user addr of that space.
963 962 */
964 963 args->auxp_brand =
965 964 (char *)((char *)args->stackend +
966 965 ((char *)&aux->a_type -
967 966 (char *)bigwad->elfargs));
968 967 ADDAUX(aux, AT_SUN_BRAND_AUX1, 0)
969 968 ADDAUX(aux, AT_SUN_BRAND_AUX2, 0)
970 969 ADDAUX(aux, AT_SUN_BRAND_AUX3, 0)
971 970 ADDAUX(aux, AT_SUN_BRAND_AUX4, 0)
972 971 }
973 972
974 973 /*
975 974 * Add the comm page auxv entry, mapping it in if needed.
976 975 */
977 976 #if defined(__amd64)
978 977 if (args->commpage != NULL ||
979 978 (args->commpage = (uintptr_t)comm_page_mapin()) != NULL) {
980 979 ADDAUX(aux, AT_SUN_COMMPAGE, args->commpage)
981 980 } else {
982 981 /*
983 982 * If the comm page cannot be mapped, pad out the auxv
984 983 * to satisfy later size checks.
985 984 */
986 985 ADDAUX(aux, AT_NULL, 0)
987 986 }
988 987 #endif /* defined(__amd64) */
989 988
990 989 ADDAUX(aux, AT_NULL, 0)
991 990 postfixsize = (char *)aux - (char *)bigwad->elfargs;
992 991
993 992 /*
994 993 * We make assumptions above when we determine how many aux
995 994 * vector entries we will be adding. However, if we have an
996 995 * invalid elf file, it is possible that mapelfexec might
997 996 * behave differently (but not return an error), in which case
998 997 * the number of aux entries we actually add will be different.
999 998 * We detect that now and error out.
1000 999 */
1001 1000 if (postfixsize != args->auxsize) {
1002 1001 DTRACE_PROBE2(elfexec_badaux, int, postfixsize,
1003 1002 int, args->auxsize);
1004 1003 goto bad;
1005 1004 }
1006 1005 ASSERT(postfixsize <= __KERN_NAUXV_IMPL * sizeof (aux_entry_t));
1007 1006 }
1008 1007
1009 1008 /*
1010 1009 * For the 64-bit kernel, the limit is big enough that rounding it up
1011 1010 * to a page can overflow the 64-bit limit, so we check for btopr()
1012 1011 * overflowing here by comparing it with the unrounded limit in pages.
1013 1012 * If it hasn't overflowed, compare the exec size with the rounded up
1014 1013 * limit in pages. Otherwise, just compare with the unrounded limit.
1015 1014 */
1016 1015 limit = btop(p->p_vmem_ctl);
1017 1016 roundlimit = btopr(p->p_vmem_ctl);
1018 1017 if ((roundlimit > limit && *execsz > roundlimit) ||
1019 1018 (roundlimit < limit && *execsz > limit)) {
1020 1019 mutex_enter(&p->p_lock);
1021 1020 (void) rctl_action(rctlproc_legacy[RLIMIT_VMEM], p->p_rctls, p,
1022 1021 RCA_SAFE);
1023 1022 mutex_exit(&p->p_lock);
1024 1023 error = ENOMEM;
1025 1024 goto bad;
1026 1025 }
1027 1026
1028 1027 bzero(up->u_auxv, sizeof (up->u_auxv));
1029 1028 up->u_commpagep = args->commpage;
1030 1029 if (postfixsize) {
1031 1030 int num_auxv;
1032 1031
1033 1032 /*
1034 1033 * Copy the aux vector to the user stack.
1035 1034 */
1036 1035 error = execpoststack(args, bigwad->elfargs, postfixsize);
1037 1036 if (error)
1038 1037 goto bad;
1039 1038
1040 1039 /*
1041 1040 * Copy auxv to the process's user structure for use by /proc.
1042 1041 * If this is a branded process, the brand's exec routine will
1043 1042 * copy it's private entries to the user structure later. It
1044 1043 * relies on the fact that the blank entries are at the end.
1045 1044 */
1046 1045 num_auxv = postfixsize / sizeof (aux_entry_t);
1047 1046 ASSERT(num_auxv <= sizeof (up->u_auxv) / sizeof (auxv_t));
1048 1047 aux = bigwad->elfargs;
1049 1048 for (i = 0; i < num_auxv; i++) {
1050 1049 up->u_auxv[i].a_type = aux[i].a_type;
1051 1050 up->u_auxv[i].a_un.a_val = (aux_val_t)aux[i].a_un.a_val;
1052 1051 }
1053 1052 }
1054 1053
1055 1054 /*
1056 1055 * Pass back the starting address so we can set the program counter.
1057 1056 */
1058 1057 args->entry = (uintptr_t)(ehdrp->e_entry + voffset);
1059 1058
1060 1059 if (!uphdr) {
1061 1060 if (ehdrp->e_type == ET_DYN) {
1062 1061 /*
1063 1062 * If we are executing a shared library which doesn't
1064 1063 * have a interpreter (probably ld.so.1) then
1065 1064 * we don't set the brkbase now. Instead we
1066 1065 * delay it's setting until the first call
1067 1066 * via grow.c::brk(). This permits ld.so.1 to
1068 1067 * initialize brkbase to the tail of the executable it
1069 1068 * loads (which is where it needs to be).
1070 1069 */
1071 1070 bigwad->exenv.ex_brkbase = (caddr_t)0;
1072 1071 bigwad->exenv.ex_bssbase = (caddr_t)0;
1073 1072 bigwad->exenv.ex_brksize = 0;
1074 1073 } else {
1075 1074 bigwad->exenv.ex_brkbase = brkbase;
1076 1075 bigwad->exenv.ex_bssbase = bssbase;
1077 1076 bigwad->exenv.ex_brksize = brksize;
1078 1077 }
1079 1078 bigwad->exenv.ex_magic = elfmagic;
1080 1079 bigwad->exenv.ex_vp = vp;
1081 1080 setexecenv(&bigwad->exenv);
1082 1081 }
1083 1082
1084 1083 ASSERT(error == 0);
1085 1084 goto out;
1086 1085
1087 1086 bad:
1088 1087 if (fd != -1) /* did we open the a.out yet */
1089 1088 (void) execclose(fd);
1090 1089
1091 1090 psignal(p, SIGKILL);
1092 1091
1093 1092 if (error == 0)
1094 1093 error = ENOEXEC;
1095 1094 out:
1096 1095 if (dynuphdr)
1097 1096 kmem_free(uphdr, sizeof (Phdr));
1098 1097 if (phdrbase != NULL)
1099 1098 kmem_free(phdrbase, phdrsize);
1100 1099 if (cap != NULL)
1101 1100 kmem_free(cap, capsize);
1102 1101 kmem_free(bigwad, sizeof (struct bigwad));
1103 1102 return (error);
1104 1103 }
1105 1104
1106 1105 /*
1107 1106 * Compute the memory size requirement for the ELF file.
1108 1107 */
1109 1108 static size_t
1110 1109 elfsize(Ehdr *ehdrp, int nphdrs, caddr_t phdrbase, uintptr_t *lddata)
1111 1110 {
1112 1111 size_t len;
1113 1112 Phdr *phdrp = (Phdr *)phdrbase;
1114 1113 int hsize = ehdrp->e_phentsize;
1115 1114 int first = 1;
1116 1115 int dfirst = 1; /* first data segment */
1117 1116 uintptr_t loaddr = 0;
1118 1117 uintptr_t hiaddr = 0;
1119 1118 uintptr_t lo, hi;
1120 1119 int i;
1121 1120
1122 1121 for (i = nphdrs; i > 0; i--) {
1123 1122 if (phdrp->p_type == PT_LOAD) {
1124 1123 lo = phdrp->p_vaddr;
1125 1124 hi = lo + phdrp->p_memsz;
1126 1125 if (first) {
1127 1126 loaddr = lo;
1128 1127 hiaddr = hi;
1129 1128 first = 0;
1130 1129 } else {
1131 1130 if (loaddr > lo)
1132 1131 loaddr = lo;
1133 1132 if (hiaddr < hi)
1134 1133 hiaddr = hi;
1135 1134 }
1136 1135
1137 1136 /*
1138 1137 * save the address of the first data segment
1139 1138 * of a object - used for the AT_SUNW_LDDATA
1140 1139 * aux entry.
1141 1140 */
1142 1141 if ((lddata != NULL) && dfirst &&
1143 1142 (phdrp->p_flags & PF_W)) {
1144 1143 *lddata = lo;
1145 1144 dfirst = 0;
1146 1145 }
1147 1146 }
1148 1147 phdrp = (Phdr *)((caddr_t)phdrp + hsize);
1149 1148 }
1150 1149
1151 1150 len = hiaddr - (loaddr & PAGEMASK);
1152 1151 len = roundup(len, PAGESIZE);
1153 1152
1154 1153 return (len);
1155 1154 }
1156 1155
1157 1156 /*
1158 1157 * Read in the ELF header and program header table.
1159 1158 * SUSV3 requires:
1160 1159 * ENOEXEC File format is not recognized
1161 1160 * EINVAL Format recognized but execution not supported
1162 1161 */
1163 1162 static int
1164 1163 getelfhead(vnode_t *vp, cred_t *credp, Ehdr *ehdr, int *nshdrs, int *shstrndx,
1165 1164 int *nphdrs)
1166 1165 {
1167 1166 int error;
1168 1167 ssize_t resid;
1169 1168
1170 1169 /*
1171 1170 * We got here by the first two bytes in ident,
1172 1171 * now read the entire ELF header.
1173 1172 */
1174 1173 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)ehdr,
1175 1174 sizeof (Ehdr), (offset_t)0, UIO_SYSSPACE, 0,
1176 1175 (rlim64_t)0, credp, &resid)) != 0)
1177 1176 return (error);
1178 1177
1179 1178 /*
1180 1179 * Since a separate version is compiled for handling 32-bit and
1181 1180 * 64-bit ELF executables on a 64-bit kernel, the 64-bit version
1182 1181 * doesn't need to be able to deal with 32-bit ELF files.
1183 1182 */
1184 1183 if (resid != 0 ||
1185 1184 ehdr->e_ident[EI_MAG2] != ELFMAG2 ||
1186 1185 ehdr->e_ident[EI_MAG3] != ELFMAG3)
1187 1186 return (ENOEXEC);
1188 1187
1189 1188 if ((ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) ||
1190 1189 #if defined(_ILP32) || defined(_ELF32_COMPAT)
1191 1190 ehdr->e_ident[EI_CLASS] != ELFCLASS32 ||
1192 1191 #else
1193 1192 ehdr->e_ident[EI_CLASS] != ELFCLASS64 ||
1194 1193 #endif
1195 1194 !elfheadcheck(ehdr->e_ident[EI_DATA], ehdr->e_machine,
1196 1195 ehdr->e_flags))
1197 1196 return (EINVAL);
1198 1197
1199 1198 *nshdrs = ehdr->e_shnum;
1200 1199 *shstrndx = ehdr->e_shstrndx;
1201 1200 *nphdrs = ehdr->e_phnum;
1202 1201
1203 1202 /*
1204 1203 * If e_shnum, e_shstrndx, or e_phnum is its sentinel value, we need
1205 1204 * to read in the section header at index zero to acces the true
1206 1205 * values for those fields.
1207 1206 */
1208 1207 if ((*nshdrs == 0 && ehdr->e_shoff != 0) ||
1209 1208 *shstrndx == SHN_XINDEX || *nphdrs == PN_XNUM) {
1210 1209 Shdr shdr;
1211 1210
1212 1211 if (ehdr->e_shoff == 0)
1213 1212 return (EINVAL);
1214 1213
1215 1214 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)&shdr,
1216 1215 sizeof (shdr), (offset_t)ehdr->e_shoff, UIO_SYSSPACE, 0,
1217 1216 (rlim64_t)0, credp, &resid)) != 0)
1218 1217 return (error);
1219 1218
1220 1219 if (*nshdrs == 0)
1221 1220 *nshdrs = shdr.sh_size;
1222 1221 if (*shstrndx == SHN_XINDEX)
1223 1222 *shstrndx = shdr.sh_link;
1224 1223 if (*nphdrs == PN_XNUM && shdr.sh_info != 0)
1225 1224 *nphdrs = shdr.sh_info;
1226 1225 }
1227 1226
1228 1227 return (0);
1229 1228 }
1230 1229
1231 1230 #ifdef _ELF32_COMPAT
1232 1231 extern size_t elf_nphdr_max;
1233 1232 #else
1234 1233 size_t elf_nphdr_max = 1000;
1235 1234 #endif
1236 1235
1237 1236 static int
1238 1237 getelfphdr(vnode_t *vp, cred_t *credp, const Ehdr *ehdr, int nphdrs,
1239 1238 caddr_t *phbasep, ssize_t *phsizep)
1240 1239 {
1241 1240 ssize_t resid, minsize;
1242 1241 int err;
1243 1242
1244 1243 /*
1245 1244 * Since we're going to be using e_phentsize to iterate down the
1246 1245 * array of program headers, it must be 8-byte aligned or else
1247 1246 * a we might cause a misaligned access. We use all members through
1248 1247 * p_flags on 32-bit ELF files and p_memsz on 64-bit ELF files so
1249 1248 * e_phentsize must be at least large enough to include those
1250 1249 * members.
1251 1250 */
1252 1251 #if !defined(_LP64) || defined(_ELF32_COMPAT)
1253 1252 minsize = offsetof(Phdr, p_flags) + sizeof (((Phdr *)NULL)->p_flags);
1254 1253 #else
1255 1254 minsize = offsetof(Phdr, p_memsz) + sizeof (((Phdr *)NULL)->p_memsz);
1256 1255 #endif
1257 1256 if (ehdr->e_phentsize < minsize || (ehdr->e_phentsize & 3))
1258 1257 return (EINVAL);
1259 1258
1260 1259 *phsizep = nphdrs * ehdr->e_phentsize;
1261 1260
1262 1261 if (*phsizep > sizeof (Phdr) * elf_nphdr_max) {
1263 1262 if ((*phbasep = kmem_alloc(*phsizep, KM_NOSLEEP)) == NULL)
1264 1263 return (ENOMEM);
1265 1264 } else {
1266 1265 *phbasep = kmem_alloc(*phsizep, KM_SLEEP);
1267 1266 }
1268 1267
1269 1268 if ((err = vn_rdwr(UIO_READ, vp, *phbasep, *phsizep,
1270 1269 (offset_t)ehdr->e_phoff, UIO_SYSSPACE, 0, (rlim64_t)0,
1271 1270 credp, &resid)) != 0) {
1272 1271 kmem_free(*phbasep, *phsizep);
1273 1272 *phbasep = NULL;
1274 1273 return (err);
1275 1274 }
1276 1275
1277 1276 return (0);
1278 1277 }
1279 1278
1280 1279 #ifdef _ELF32_COMPAT
1281 1280 extern size_t elf_nshdr_max;
1282 1281 extern size_t elf_shstrtab_max;
1283 1282 #else
1284 1283 size_t elf_nshdr_max = 10000;
1285 1284 size_t elf_shstrtab_max = 100 * 1024;
1286 1285 #endif
1287 1286
1288 1287
1289 1288 static int
1290 1289 getelfshdr(vnode_t *vp, cred_t *credp, const Ehdr *ehdr,
1291 1290 int nshdrs, int shstrndx, caddr_t *shbasep, ssize_t *shsizep,
1292 1291 char **shstrbasep, ssize_t *shstrsizep)
1293 1292 {
1294 1293 ssize_t resid, minsize;
1295 1294 int err;
1296 1295 Shdr *shdr;
1297 1296
1298 1297 /*
1299 1298 * Since we're going to be using e_shentsize to iterate down the
1300 1299 * array of section headers, it must be 8-byte aligned or else
1301 1300 * a we might cause a misaligned access. We use all members through
1302 1301 * sh_entsize (on both 32- and 64-bit ELF files) so e_shentsize
1303 1302 * must be at least large enough to include that member. The index
1304 1303 * of the string table section must also be valid.
1305 1304 */
1306 1305 minsize = offsetof(Shdr, sh_entsize) + sizeof (shdr->sh_entsize);
1307 1306 if (ehdr->e_shentsize < minsize || (ehdr->e_shentsize & 3) ||
1308 1307 shstrndx >= nshdrs)
1309 1308 return (EINVAL);
1310 1309
1311 1310 *shsizep = nshdrs * ehdr->e_shentsize;
1312 1311
1313 1312 if (*shsizep > sizeof (Shdr) * elf_nshdr_max) {
1314 1313 if ((*shbasep = kmem_alloc(*shsizep, KM_NOSLEEP)) == NULL)
1315 1314 return (ENOMEM);
1316 1315 } else {
1317 1316 *shbasep = kmem_alloc(*shsizep, KM_SLEEP);
1318 1317 }
1319 1318
1320 1319 if ((err = vn_rdwr(UIO_READ, vp, *shbasep, *shsizep,
1321 1320 (offset_t)ehdr->e_shoff, UIO_SYSSPACE, 0, (rlim64_t)0,
1322 1321 credp, &resid)) != 0) {
1323 1322 kmem_free(*shbasep, *shsizep);
1324 1323 return (err);
1325 1324 }
1326 1325
1327 1326 /*
1328 1327 * Pull the section string table out of the vnode; fail if the size
1329 1328 * is zero.
1330 1329 */
1331 1330 shdr = (Shdr *)(*shbasep + shstrndx * ehdr->e_shentsize);
1332 1331 if ((*shstrsizep = shdr->sh_size) == 0) {
1333 1332 kmem_free(*shbasep, *shsizep);
1334 1333 return (EINVAL);
1335 1334 }
1336 1335
1337 1336 if (*shstrsizep > elf_shstrtab_max) {
1338 1337 if ((*shstrbasep = kmem_alloc(*shstrsizep,
1339 1338 KM_NOSLEEP)) == NULL) {
1340 1339 kmem_free(*shbasep, *shsizep);
1341 1340 return (ENOMEM);
1342 1341 }
1343 1342 } else {
1344 1343 *shstrbasep = kmem_alloc(*shstrsizep, KM_SLEEP);
1345 1344 }
1346 1345
1347 1346 if ((err = vn_rdwr(UIO_READ, vp, *shstrbasep, *shstrsizep,
1348 1347 (offset_t)shdr->sh_offset, UIO_SYSSPACE, 0, (rlim64_t)0,
1349 1348 credp, &resid)) != 0) {
1350 1349 kmem_free(*shbasep, *shsizep);
1351 1350 kmem_free(*shstrbasep, *shstrsizep);
1352 1351 return (err);
1353 1352 }
1354 1353
1355 1354 /*
1356 1355 * Make sure the strtab is null-terminated to make sure we
1357 1356 * don't run off the end of the table.
1358 1357 */
1359 1358 (*shstrbasep)[*shstrsizep - 1] = '\0';
1360 1359
1361 1360 return (0);
1362 1361 }
1363 1362
1364 1363
1365 1364 #ifdef _ELF32_COMPAT
1366 1365 int
1367 1366 elf32readhdr(vnode_t *vp, cred_t *credp, Ehdr *ehdrp, int *nphdrs,
1368 1367 caddr_t *phbasep, ssize_t *phsizep)
1369 1368 #else
1370 1369 int
1371 1370 elfreadhdr(vnode_t *vp, cred_t *credp, Ehdr *ehdrp, int *nphdrs,
1372 1371 caddr_t *phbasep, ssize_t *phsizep)
1373 1372 #endif
1374 1373 {
1375 1374 int error, nshdrs, shstrndx;
1376 1375
1377 1376 if ((error = getelfhead(vp, credp, ehdrp, &nshdrs, &shstrndx,
1378 1377 nphdrs)) != 0 ||
1379 1378 (error = getelfphdr(vp, credp, ehdrp, *nphdrs, phbasep,
1380 1379 phsizep)) != 0) {
1381 1380 return (error);
1382 1381 }
1383 1382 return (0);
1384 1383 }
1385 1384
1386 1385
1387 1386 static int
1388 1387 mapelfexec(
1389 1388 vnode_t *vp,
1390 1389 Ehdr *ehdr,
1391 1390 int nphdrs,
1392 1391 caddr_t phdrbase,
1393 1392 Phdr **uphdr,
1394 1393 Phdr **dyphdr,
1395 1394 Phdr **stphdr,
1396 1395 Phdr **dtphdr,
1397 1396 Phdr *dataphdrp,
1398 1397 caddr_t *bssbase,
1399 1398 caddr_t *brkbase,
1400 1399 intptr_t *voffset,
1401 1400 intptr_t *minaddr,
1402 1401 size_t len,
1403 1402 long *execsz,
1404 1403 size_t *brksize)
1405 1404 {
1406 1405 Phdr *phdr;
1407 1406 int i, prot, error, lastprot = 0;
1408 1407 caddr_t addr = NULL;
1409 1408 size_t zfodsz;
1410 1409 int ptload = 0;
1411 1410 int page;
1412 1411 off_t offset;
1413 1412 int hsize = ehdr->e_phentsize;
1414 1413 caddr_t mintmp = (caddr_t)-1;
1415 1414 uintptr_t lastaddr = NULL;
1416 1415 extern int use_brk_lpg;
1417 1416
1418 1417 if (ehdr->e_type == ET_DYN) {
1419 1418 caddr_t vaddr;
1420 1419
1421 1420 /*
1422 1421 * Despite the fact that mmapobj(2) refuses to load them, we
1423 1422 * need to support executing ET_DYN objects that have a
1424 1423 * non-NULL p_vaddr. When found in the wild, these objects
1425 1424 * are likely to be due to an old (and largely obviated) Linux
1426 1425 * facility, prelink(8), that rewrites shared objects to
1427 1426 * prefer specific (disjoint) virtual address ranges. (Yes,
1428 1427 * this is putatively for performance -- and yes, it has
1429 1428 * limited applicability, many edge conditions and grisly
1430 1429 * failure modes; even for Linux, it's insane.) As ELF
1431 1430 * mandates that the PT_LOAD segments be in p_vaddr order, we
1432 1431 * find the lowest p_vaddr by finding the first PT_LOAD
1433 1432 * segment.
1434 1433 */
1435 1434 phdr = (Phdr *)phdrbase;
1436 1435 for (i = nphdrs; i > 0; i--) {
1437 1436 if (phdr->p_type == PT_LOAD) {
1438 1437 addr = (caddr_t)(uintptr_t)phdr->p_vaddr;
1439 1438 break;
1440 1439 }
1441 1440 phdr = (Phdr *)((caddr_t)phdr + hsize);
1442 1441 }
1443 1442
1444 1443 /*
1445 1444 * We have a non-zero p_vaddr in the first PT_LOAD segment --
1446 1445 * presumably because we're directly executing a prelink(8)'d
1447 1446 * ld-linux.so. While we could correctly execute such an
1448 1447 * object without locating it at its desired p_vaddr (it is,
1449 1448 * after all, still relocatable), our inner antiquarian
1450 1449 * derives a perverse pleasure in accommodating the steampunk
1451 1450 * prelink(8) contraption -- goggles on!
1452 1451 */
1453 1452 if ((vaddr = addr) != NULL) {
1454 1453 if (as_gap(curproc->p_as, len,
1455 1454 &addr, &len, AH_LO, NULL) == -1 || addr != vaddr) {
1456 1455 addr = NULL;
1457 1456 }
1458 1457 }
1459 1458
1460 1459 if (addr == NULL) {
1461 1460 /*
1462 1461 * We either have a NULL p_vaddr (the common case, by
1463 1462 * many orders of magnitude) or we have a non-NULL
1464 1463 * p_vaddr and we were unable to obtain the specified
1465 1464 * VA range (presumably because it's an illegal
1466 1465 * address). Either way, obtain an address in which
1467 1466 * to map the interpreter.
1468 1467 */
1469 1468 map_addr(&addr, len, (offset_t)0, 1, 0);
1470 1469 if (addr == NULL)
1471 1470 return (ENOMEM);
1472 1471 }
1473 1472
1474 1473 /*
1475 1474 * Our voffset is the difference between where we landed and
1476 1475 * where we wanted to be.
1477 1476 */
1478 1477 *voffset = (uintptr_t)addr - (uintptr_t)vaddr;
1479 1478 } else {
1480 1479 *voffset = 0;
1481 1480 }
1482 1481
1483 1482 phdr = (Phdr *)phdrbase;
1484 1483 for (i = nphdrs; i > 0; i--) {
1485 1484 switch (phdr->p_type) {
1486 1485 case PT_LOAD:
1487 1486 ptload = 1;
1488 1487 prot = PROT_USER;
1489 1488 if (phdr->p_flags & PF_R)
1490 1489 prot |= PROT_READ;
1491 1490 if (phdr->p_flags & PF_W)
1492 1491 prot |= PROT_WRITE;
1493 1492 if (phdr->p_flags & PF_X)
1494 1493 prot |= PROT_EXEC;
1495 1494
1496 1495 addr = (caddr_t)((uintptr_t)phdr->p_vaddr + *voffset);
1497 1496
1498 1497 if ((*dyphdr != NULL) && uphdr != NULL &&
1499 1498 (*uphdr == NULL)) {
1500 1499 /*
1501 1500 * The PT_PHDR program header is, strictly
1502 1501 * speaking, optional. If we find that this
1503 1502 * is missing, we will determine the location
1504 1503 * of the program headers based on the address
1505 1504 * of the lowest PT_LOAD segment (namely, this
1506 1505 * one): we subtract the p_offset to get to
1507 1506 * the ELF header and then add back the program
1508 1507 * header offset to get to the program headers.
1509 1508 * We then cons up a Phdr that corresponds to
1510 1509 * the (missing) PT_PHDR, setting the flags
1511 1510 * to 0 to denote that this is artificial and
1512 1511 * should (must) be freed by the caller.
1513 1512 */
1514 1513 Phdr *cons;
1515 1514
1516 1515 cons = kmem_zalloc(sizeof (Phdr), KM_SLEEP);
1517 1516
1518 1517 cons->p_flags = 0;
1519 1518 cons->p_type = PT_PHDR;
1520 1519 cons->p_vaddr = ((uintptr_t)addr -
1521 1520 phdr->p_offset) + ehdr->e_phoff;
1522 1521
1523 1522 *uphdr = cons;
1524 1523 }
1525 1524
1526 1525 /*
1527 1526 * Keep track of the segment with the lowest starting
1528 1527 * address.
1529 1528 */
1530 1529 if (addr < mintmp)
1531 1530 mintmp = addr;
1532 1531
1533 1532 /*
1534 1533 * Segments need not correspond to page boundaries:
1535 1534 * they are permitted to share a page. If two PT_LOAD
1536 1535 * segments share the same page, and the permissions
1537 1536 * of the segments differ, the behavior is historically
1538 1537 * that the permissions of the latter segment are used
1539 1538 * for the page that the two segments share. This is
1540 1539 * also historically a non-issue: binaries generated
1541 1540 * by most anything will make sure that two PT_LOAD
1542 1541 * segments with differing permissions don't actually
1543 1542 * share any pages. However, there exist some crazy
1544 1543 * things out there (including at least an obscure
1545 1544 * Portuguese teaching language called G-Portugol) that
1546 1545 * actually do the wrong thing and expect it to work:
1547 1546 * they have a segment with execute permission share
1548 1547 * a page with a subsequent segment that does not
1549 1548 * have execute permissions and expect the resulting
1550 1549 * shared page to in fact be executable. To accommodate
1551 1550 * such broken link editors, we take advantage of a
1552 1551 * latitude explicitly granted to the loader: it is
1553 1552 * permitted to make _any_ PT_LOAD segment executable
1554 1553 * (provided that it is readable or writable). If we
1555 1554 * see that we're sharing a page and that the previous
1556 1555 * page was executable, we will add execute permissions
1557 1556 * to our segment.
1558 1557 */
1559 1558 if (btop(lastaddr) == btop((uintptr_t)addr) &&
1560 1559 (phdr->p_flags & (PF_R | PF_W)) &&
1561 1560 (lastprot & PROT_EXEC)) {
1562 1561 prot |= PROT_EXEC;
1563 1562 }
1564 1563
1565 1564 lastaddr = (uintptr_t)addr + phdr->p_filesz;
1566 1565 lastprot = prot;
1567 1566
1568 1567 zfodsz = (size_t)phdr->p_memsz - phdr->p_filesz;
1569 1568
1570 1569 offset = phdr->p_offset;
1571 1570 if (((uintptr_t)offset & PAGEOFFSET) ==
1572 1571 ((uintptr_t)addr & PAGEOFFSET) &&
1573 1572 (!(vp->v_flag & VNOMAP))) {
1574 1573 page = 1;
1575 1574 } else {
1576 1575 page = 0;
1577 1576 }
1578 1577
1579 1578 /*
1580 1579 * Set the heap pagesize for OOB when the bss size
1581 1580 * is known and use_brk_lpg is not 0.
1582 1581 */
1583 1582 if (brksize != NULL && use_brk_lpg &&
1584 1583 zfodsz != 0 && phdr == dataphdrp &&
1585 1584 (prot & PROT_WRITE)) {
1586 1585 size_t tlen = P2NPHASE((uintptr_t)addr +
1587 1586 phdr->p_filesz, PAGESIZE);
1588 1587
1589 1588 if (zfodsz > tlen) {
1590 1589 curproc->p_brkpageszc =
1591 1590 page_szc(map_pgsz(MAPPGSZ_HEAP,
1592 1591 curproc, addr + phdr->p_filesz +
1593 1592 tlen, zfodsz - tlen, 0));
1594 1593 }
1595 1594 }
1596 1595
1597 1596 if (curproc->p_brkpageszc != 0 && phdr == dataphdrp &&
1598 1597 (prot & PROT_WRITE)) {
1599 1598 uint_t szc = curproc->p_brkpageszc;
1600 1599 size_t pgsz = page_get_pagesize(szc);
1601 1600 caddr_t ebss = addr + phdr->p_memsz;
1602 1601 size_t extra_zfodsz;
1603 1602
1604 1603 ASSERT(pgsz > PAGESIZE);
1605 1604
1606 1605 extra_zfodsz = P2NPHASE((uintptr_t)ebss, pgsz);
1607 1606
1608 1607 if (error = execmap(vp, addr, phdr->p_filesz,
1609 1608 zfodsz + extra_zfodsz, phdr->p_offset,
1610 1609 prot, page, szc))
1611 1610 goto bad;
1612 1611 if (brksize != NULL)
1613 1612 *brksize = extra_zfodsz;
1614 1613 } else {
1615 1614 if (error = execmap(vp, addr, phdr->p_filesz,
1616 1615 zfodsz, phdr->p_offset, prot, page, 0))
1617 1616 goto bad;
1618 1617 }
1619 1618
1620 1619 if (bssbase != NULL && addr >= *bssbase &&
1621 1620 phdr == dataphdrp) {
1622 1621 *bssbase = addr + phdr->p_filesz;
1623 1622 }
1624 1623 if (brkbase != NULL && addr >= *brkbase) {
1625 1624 *brkbase = addr + phdr->p_memsz;
1626 1625 }
1627 1626
1628 1627 *execsz += btopr(phdr->p_memsz);
1629 1628 break;
1630 1629
1631 1630 case PT_INTERP:
1632 1631 /*
1633 1632 * The ELF specification is unequivocal about the
1634 1633 * PT_INTERP program header with respect to any PT_LOAD
1635 1634 * program header: "If it is present, it must precede
1636 1635 * any loadable segment entry." Linux, however, makes
1637 1636 * no attempt to enforce this -- which has allowed some
1638 1637 * binary editing tools to get away with generating
1639 1638 * invalid ELF binaries in the respect that PT_INTERP
1640 1639 * occurs after the first PT_LOAD program header. This
1641 1640 * is unfortunate (and of course, disappointing) but
1642 1641 * it's no worse than that: there is no reason that we
1643 1642 * can't process the PT_INTERP entry (if present) after
1644 1643 * one or more PT_LOAD entries. We therefore
1645 1644 * deliberately do not check ptload here and always
1646 1645 * store dyphdr to be the PT_INTERP program header.
1647 1646 */
1648 1647 *dyphdr = phdr;
1649 1648 break;
1650 1649
1651 1650 case PT_SHLIB:
1652 1651 *stphdr = phdr;
1653 1652 break;
1654 1653
1655 1654 case PT_PHDR:
1656 1655 if (ptload || phdr->p_flags == 0)
1657 1656 goto bad;
1658 1657
1659 1658 if (uphdr != NULL)
1660 1659 *uphdr = phdr;
1661 1660
1662 1661 break;
1663 1662
1664 1663 case PT_NULL:
1665 1664 case PT_DYNAMIC:
1666 1665 case PT_NOTE:
1667 1666 break;
1668 1667
1669 1668 case PT_SUNWDTRACE:
1670 1669 if (dtphdr != NULL)
1671 1670 *dtphdr = phdr;
1672 1671 break;
1673 1672
1674 1673 default:
1675 1674 break;
1676 1675 }
1677 1676 phdr = (Phdr *)((caddr_t)phdr + hsize);
1678 1677 }
1679 1678
1680 1679 if (minaddr != NULL) {
1681 1680 ASSERT(mintmp != (caddr_t)-1);
1682 1681 *minaddr = (intptr_t)mintmp;
1683 1682 }
1684 1683
1685 1684 return (0);
1686 1685 bad:
1687 1686 if (error == 0)
1688 1687 error = EINVAL;
1689 1688 return (error);
1690 1689 }
1691 1690
1692 1691 int
1693 1692 elfnote(vnode_t *vp, offset_t *offsetp, int type, int descsz, void *desc,
1694 1693 rlim64_t rlimit, cred_t *credp)
1695 1694 {
1696 1695 Note note;
1697 1696 int error;
1698 1697
1699 1698 bzero(¬e, sizeof (note));
1700 1699 bcopy("CORE", note.name, 4);
1701 1700 note.nhdr.n_type = type;
1702 1701 /*
1703 1702 * The System V ABI states that n_namesz must be the length of the
1704 1703 * string that follows the Nhdr structure including the terminating
1705 1704 * null. The ABI also specifies that sufficient padding should be
1706 1705 * included so that the description that follows the name string
1707 1706 * begins on a 4- or 8-byte boundary for 32- and 64-bit binaries
1708 1707 * respectively. However, since this change was not made correctly
1709 1708 * at the time of the 64-bit port, both 32- and 64-bit binaries
1710 1709 * descriptions are only guaranteed to begin on a 4-byte boundary.
1711 1710 */
1712 1711 note.nhdr.n_namesz = 5;
1713 1712 note.nhdr.n_descsz = roundup(descsz, sizeof (Word));
1714 1713
1715 1714 if (error = core_write(vp, UIO_SYSSPACE, *offsetp, ¬e,
1716 1715 sizeof (note), rlimit, credp))
1717 1716 return (error);
1718 1717
1719 1718 *offsetp += sizeof (note);
1720 1719
1721 1720 if (error = core_write(vp, UIO_SYSSPACE, *offsetp, desc,
1722 1721 note.nhdr.n_descsz, rlimit, credp))
1723 1722 return (error);
1724 1723
1725 1724 *offsetp += note.nhdr.n_descsz;
1726 1725 return (0);
1727 1726 }
1728 1727
1729 1728 /*
1730 1729 * Copy the section data from one vnode to the section of another vnode.
1731 1730 */
1732 1731 static void
1733 1732 copy_scn(Shdr *src, vnode_t *src_vp, Shdr *dst, vnode_t *dst_vp, Off *doffset,
1734 1733 void *buf, size_t size, cred_t *credp, rlim64_t rlimit)
1735 1734 {
1736 1735 ssize_t resid;
1737 1736 size_t len, n = src->sh_size;
1738 1737 offset_t off = 0;
1739 1738
1740 1739 while (n != 0) {
1741 1740 len = MIN(size, n);
1742 1741 if (vn_rdwr(UIO_READ, src_vp, buf, len, src->sh_offset + off,
1743 1742 UIO_SYSSPACE, 0, (rlim64_t)0, credp, &resid) != 0 ||
1744 1743 resid >= len ||
1745 1744 core_write(dst_vp, UIO_SYSSPACE, *doffset + off,
1746 1745 buf, len - resid, rlimit, credp) != 0) {
1747 1746 dst->sh_size = 0;
1748 1747 dst->sh_offset = 0;
1749 1748 return;
1750 1749 }
1751 1750
1752 1751 ASSERT(n >= len - resid);
1753 1752
1754 1753 n -= len - resid;
1755 1754 off += len - resid;
1756 1755 }
1757 1756
1758 1757 *doffset += src->sh_size;
1759 1758 }
1760 1759
1761 1760 #ifdef _ELF32_COMPAT
1762 1761 extern size_t elf_datasz_max;
1763 1762 #else
1764 1763 size_t elf_datasz_max = 1 * 1024 * 1024;
1765 1764 #endif
1766 1765
1767 1766 /*
1768 1767 * This function processes mappings that correspond to load objects to
1769 1768 * examine their respective sections for elfcore(). It's called once with
1770 1769 * v set to NULL to count the number of sections that we're going to need
1771 1770 * and then again with v set to some allocated buffer that we fill in with
1772 1771 * all the section data.
1773 1772 */
1774 1773 static int
1775 1774 process_scns(core_content_t content, proc_t *p, cred_t *credp, vnode_t *vp,
1776 1775 Shdr *v, int nv, rlim64_t rlimit, Off *doffsetp, int *nshdrsp)
1777 1776 {
1778 1777 vnode_t *lastvp = NULL;
1779 1778 struct seg *seg;
1780 1779 int i, j;
1781 1780 void *data = NULL;
1782 1781 size_t datasz = 0;
1783 1782 shstrtab_t shstrtab;
1784 1783 struct as *as = p->p_as;
1785 1784 int error = 0;
1786 1785
1787 1786 if (v != NULL)
1788 1787 shstrtab_init(&shstrtab);
1789 1788
1790 1789 i = 1;
1791 1790 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1792 1791 uint_t prot;
1793 1792 vnode_t *mvp;
1794 1793 void *tmp = NULL;
1795 1794 caddr_t saddr = seg->s_base;
1796 1795 caddr_t naddr;
1797 1796 caddr_t eaddr;
1798 1797 size_t segsize;
1799 1798
1800 1799 Ehdr ehdr;
1801 1800 int nshdrs, shstrndx, nphdrs;
1802 1801 caddr_t shbase;
1803 1802 ssize_t shsize;
1804 1803 char *shstrbase;
1805 1804 ssize_t shstrsize;
1806 1805
1807 1806 Shdr *shdr;
1808 1807 const char *name;
1809 1808 size_t sz;
1810 1809 uintptr_t off;
1811 1810
1812 1811 int ctf_ndx = 0;
1813 1812 int symtab_ndx = 0;
1814 1813
1815 1814 /*
1816 1815 * Since we're just looking for text segments of load
1817 1816 * objects, we only care about the protection bits; we don't
1818 1817 * care about the actual size of the segment so we use the
1819 1818 * reserved size. If the segment's size is zero, there's
1820 1819 * something fishy going on so we ignore this segment.
1821 1820 */
1822 1821 if (seg->s_ops != &segvn_ops ||
1823 1822 SEGOP_GETVP(seg, seg->s_base, &mvp) != 0 ||
1824 1823 mvp == lastvp || mvp == NULL || mvp->v_type != VREG ||
1825 1824 (segsize = pr_getsegsize(seg, 1)) == 0)
1826 1825 continue;
1827 1826
1828 1827 eaddr = saddr + segsize;
1829 1828 prot = pr_getprot(seg, 1, &tmp, &saddr, &naddr, eaddr);
1830 1829 pr_getprot_done(&tmp);
1831 1830
1832 1831 /*
1833 1832 * Skip this segment unless the protection bits look like
1834 1833 * what we'd expect for a text segment.
1835 1834 */
1836 1835 if ((prot & (PROT_WRITE | PROT_EXEC)) != PROT_EXEC)
1837 1836 continue;
1838 1837
1839 1838 if (getelfhead(mvp, credp, &ehdr, &nshdrs, &shstrndx,
1840 1839 &nphdrs) != 0 ||
1841 1840 getelfshdr(mvp, credp, &ehdr, nshdrs, shstrndx,
1842 1841 &shbase, &shsize, &shstrbase, &shstrsize) != 0)
1843 1842 continue;
1844 1843
1845 1844 off = ehdr.e_shentsize;
1846 1845 for (j = 1; j < nshdrs; j++, off += ehdr.e_shentsize) {
1847 1846 Shdr *symtab = NULL, *strtab;
1848 1847
1849 1848 shdr = (Shdr *)(shbase + off);
1850 1849
1851 1850 if (shdr->sh_name >= shstrsize)
1852 1851 continue;
1853 1852
1854 1853 name = shstrbase + shdr->sh_name;
1855 1854
1856 1855 if (strcmp(name, shstrtab_data[STR_CTF]) == 0) {
1857 1856 if ((content & CC_CONTENT_CTF) == 0 ||
1858 1857 ctf_ndx != 0)
1859 1858 continue;
1860 1859
1861 1860 if (shdr->sh_link > 0 &&
1862 1861 shdr->sh_link < nshdrs) {
1863 1862 symtab = (Shdr *)(shbase +
1864 1863 shdr->sh_link * ehdr.e_shentsize);
1865 1864 }
1866 1865
1867 1866 if (v != NULL && i < nv - 1) {
1868 1867 if (shdr->sh_size > datasz &&
1869 1868 shdr->sh_size <= elf_datasz_max) {
1870 1869 if (data != NULL)
1871 1870 kmem_free(data, datasz);
1872 1871
1873 1872 datasz = shdr->sh_size;
1874 1873 data = kmem_alloc(datasz,
1875 1874 KM_SLEEP);
1876 1875 }
1877 1876
1878 1877 v[i].sh_name = shstrtab_ndx(&shstrtab,
1879 1878 STR_CTF);
1880 1879 v[i].sh_addr = (Addr)(uintptr_t)saddr;
1881 1880 v[i].sh_type = SHT_PROGBITS;
1882 1881 v[i].sh_addralign = 4;
1883 1882 *doffsetp = roundup(*doffsetp,
1884 1883 v[i].sh_addralign);
1885 1884 v[i].sh_offset = *doffsetp;
1886 1885 v[i].sh_size = shdr->sh_size;
1887 1886 if (symtab == NULL) {
1888 1887 v[i].sh_link = 0;
1889 1888 } else if (symtab->sh_type ==
1890 1889 SHT_SYMTAB &&
1891 1890 symtab_ndx != 0) {
1892 1891 v[i].sh_link =
1893 1892 symtab_ndx;
1894 1893 } else {
1895 1894 v[i].sh_link = i + 1;
1896 1895 }
1897 1896
1898 1897 copy_scn(shdr, mvp, &v[i], vp,
1899 1898 doffsetp, data, datasz, credp,
1900 1899 rlimit);
1901 1900 }
1902 1901
1903 1902 ctf_ndx = i++;
1904 1903
1905 1904 /*
1906 1905 * We've already dumped the symtab.
1907 1906 */
1908 1907 if (symtab != NULL &&
1909 1908 symtab->sh_type == SHT_SYMTAB &&
1910 1909 symtab_ndx != 0)
1911 1910 continue;
1912 1911
1913 1912 } else if (strcmp(name,
1914 1913 shstrtab_data[STR_SYMTAB]) == 0) {
1915 1914 if ((content & CC_CONTENT_SYMTAB) == 0 ||
1916 1915 symtab != 0)
1917 1916 continue;
1918 1917
1919 1918 symtab = shdr;
1920 1919 }
1921 1920
1922 1921 if (symtab != NULL) {
1923 1922 if ((symtab->sh_type != SHT_DYNSYM &&
1924 1923 symtab->sh_type != SHT_SYMTAB) ||
1925 1924 symtab->sh_link == 0 ||
1926 1925 symtab->sh_link >= nshdrs)
1927 1926 continue;
1928 1927
1929 1928 strtab = (Shdr *)(shbase +
1930 1929 symtab->sh_link * ehdr.e_shentsize);
1931 1930
1932 1931 if (strtab->sh_type != SHT_STRTAB)
1933 1932 continue;
1934 1933
1935 1934 if (v != NULL && i < nv - 2) {
1936 1935 sz = MAX(symtab->sh_size,
1937 1936 strtab->sh_size);
1938 1937 if (sz > datasz &&
1939 1938 sz <= elf_datasz_max) {
1940 1939 if (data != NULL)
1941 1940 kmem_free(data, datasz);
1942 1941
1943 1942 datasz = sz;
1944 1943 data = kmem_alloc(datasz,
1945 1944 KM_SLEEP);
1946 1945 }
1947 1946
1948 1947 if (symtab->sh_type == SHT_DYNSYM) {
1949 1948 v[i].sh_name = shstrtab_ndx(
1950 1949 &shstrtab, STR_DYNSYM);
1951 1950 v[i + 1].sh_name = shstrtab_ndx(
1952 1951 &shstrtab, STR_DYNSTR);
1953 1952 } else {
1954 1953 v[i].sh_name = shstrtab_ndx(
1955 1954 &shstrtab, STR_SYMTAB);
1956 1955 v[i + 1].sh_name = shstrtab_ndx(
1957 1956 &shstrtab, STR_STRTAB);
1958 1957 }
1959 1958
1960 1959 v[i].sh_type = symtab->sh_type;
1961 1960 v[i].sh_addr = symtab->sh_addr;
1962 1961 if (ehdr.e_type == ET_DYN ||
1963 1962 v[i].sh_addr == 0)
1964 1963 v[i].sh_addr +=
1965 1964 (Addr)(uintptr_t)saddr;
1966 1965 v[i].sh_addralign =
1967 1966 symtab->sh_addralign;
1968 1967 *doffsetp = roundup(*doffsetp,
1969 1968 v[i].sh_addralign);
1970 1969 v[i].sh_offset = *doffsetp;
1971 1970 v[i].sh_size = symtab->sh_size;
1972 1971 v[i].sh_link = i + 1;
1973 1972 v[i].sh_entsize = symtab->sh_entsize;
1974 1973 v[i].sh_info = symtab->sh_info;
1975 1974
1976 1975 copy_scn(symtab, mvp, &v[i], vp,
1977 1976 doffsetp, data, datasz, credp,
1978 1977 rlimit);
1979 1978
1980 1979 v[i + 1].sh_type = SHT_STRTAB;
1981 1980 v[i + 1].sh_flags = SHF_STRINGS;
1982 1981 v[i + 1].sh_addr = symtab->sh_addr;
1983 1982 if (ehdr.e_type == ET_DYN ||
1984 1983 v[i + 1].sh_addr == 0)
1985 1984 v[i + 1].sh_addr +=
1986 1985 (Addr)(uintptr_t)saddr;
1987 1986 v[i + 1].sh_addralign =
1988 1987 strtab->sh_addralign;
1989 1988 *doffsetp = roundup(*doffsetp,
1990 1989 v[i + 1].sh_addralign);
1991 1990 v[i + 1].sh_offset = *doffsetp;
1992 1991 v[i + 1].sh_size = strtab->sh_size;
1993 1992
1994 1993 copy_scn(strtab, mvp, &v[i + 1], vp,
1995 1994 doffsetp, data, datasz, credp,
1996 1995 rlimit);
1997 1996 }
1998 1997
1999 1998 if (symtab->sh_type == SHT_SYMTAB)
2000 1999 symtab_ndx = i;
2001 2000 i += 2;
2002 2001 }
2003 2002 }
2004 2003
2005 2004 kmem_free(shstrbase, shstrsize);
2006 2005 kmem_free(shbase, shsize);
2007 2006
2008 2007 lastvp = mvp;
2009 2008 }
2010 2009
2011 2010 if (v == NULL) {
2012 2011 if (i == 1)
2013 2012 *nshdrsp = 0;
2014 2013 else
2015 2014 *nshdrsp = i + 1;
2016 2015 goto done;
2017 2016 }
2018 2017
2019 2018 if (i != nv - 1) {
2020 2019 cmn_err(CE_WARN, "elfcore: core dump failed for "
2021 2020 "process %d; address space is changing", p->p_pid);
2022 2021 error = EIO;
2023 2022 goto done;
2024 2023 }
2025 2024
2026 2025 v[i].sh_name = shstrtab_ndx(&shstrtab, STR_SHSTRTAB);
2027 2026 v[i].sh_size = shstrtab_size(&shstrtab);
2028 2027 v[i].sh_addralign = 1;
2029 2028 *doffsetp = roundup(*doffsetp, v[i].sh_addralign);
2030 2029 v[i].sh_offset = *doffsetp;
2031 2030 v[i].sh_flags = SHF_STRINGS;
2032 2031 v[i].sh_type = SHT_STRTAB;
2033 2032
2034 2033 if (v[i].sh_size > datasz) {
2035 2034 if (data != NULL)
2036 2035 kmem_free(data, datasz);
2037 2036
2038 2037 datasz = v[i].sh_size;
2039 2038 data = kmem_alloc(datasz,
2040 2039 KM_SLEEP);
2041 2040 }
2042 2041
2043 2042 shstrtab_dump(&shstrtab, data);
2044 2043
2045 2044 if ((error = core_write(vp, UIO_SYSSPACE, *doffsetp,
2046 2045 data, v[i].sh_size, rlimit, credp)) != 0)
2047 2046 goto done;
2048 2047
2049 2048 *doffsetp += v[i].sh_size;
2050 2049
2051 2050 done:
2052 2051 if (data != NULL)
2053 2052 kmem_free(data, datasz);
2054 2053
2055 2054 return (error);
2056 2055 }
2057 2056
2058 2057 int
2059 2058 elfcore(vnode_t *vp, proc_t *p, cred_t *credp, rlim64_t rlimit, int sig,
2060 2059 core_content_t content)
2061 2060 {
2062 2061 offset_t poffset, soffset;
2063 2062 Off doffset;
2064 2063 int error, i, nphdrs, nshdrs;
2065 2064 int overflow = 0;
2066 2065 struct seg *seg;
2067 2066 struct as *as = p->p_as;
2068 2067 union {
2069 2068 Ehdr ehdr;
2070 2069 Phdr phdr[1];
2071 2070 Shdr shdr[1];
2072 2071 } *bigwad;
2073 2072 size_t bigsize;
2074 2073 size_t phdrsz, shdrsz;
2075 2074 Ehdr *ehdr;
2076 2075 Phdr *v;
2077 2076 caddr_t brkbase;
2078 2077 size_t brksize;
2079 2078 caddr_t stkbase;
2080 2079 size_t stksize;
2081 2080 int ntries = 0;
2082 2081 klwp_t *lwp = ttolwp(curthread);
2083 2082
2084 2083 top:
2085 2084 /*
2086 2085 * Make sure we have everything we need (registers, etc.).
2087 2086 * All other lwps have already stopped and are in an orderly state.
2088 2087 */
2089 2088 ASSERT(p == ttoproc(curthread));
2090 2089 prstop(0, 0);
2091 2090
2092 2091 AS_LOCK_ENTER(as, RW_WRITER);
2093 2092 nphdrs = prnsegs(as, 0) + 2; /* two CORE note sections */
2094 2093
2095 2094 /*
2096 2095 * Count the number of section headers we're going to need.
2097 2096 */
2098 2097 nshdrs = 0;
2099 2098 if (content & (CC_CONTENT_CTF | CC_CONTENT_SYMTAB)) {
2100 2099 (void) process_scns(content, p, credp, NULL, NULL, NULL, 0,
2101 2100 NULL, &nshdrs);
2102 2101 }
2103 2102 AS_LOCK_EXIT(as);
2104 2103
2105 2104 ASSERT(nshdrs == 0 || nshdrs > 1);
2106 2105
2107 2106 /*
2108 2107 * The core file contents may required zero section headers, but if
2109 2108 * we overflow the 16 bits allotted to the program header count in
2110 2109 * the ELF header, we'll need that program header at index zero.
2111 2110 */
2112 2111 if (nshdrs == 0 && nphdrs >= PN_XNUM)
2113 2112 nshdrs = 1;
2114 2113
2115 2114 phdrsz = nphdrs * sizeof (Phdr);
2116 2115 shdrsz = nshdrs * sizeof (Shdr);
2117 2116
2118 2117 bigsize = MAX(sizeof (*bigwad), MAX(phdrsz, shdrsz));
2119 2118 bigwad = kmem_alloc(bigsize, KM_SLEEP);
2120 2119
2121 2120 ehdr = &bigwad->ehdr;
2122 2121 bzero(ehdr, sizeof (*ehdr));
2123 2122
2124 2123 ehdr->e_ident[EI_MAG0] = ELFMAG0;
2125 2124 ehdr->e_ident[EI_MAG1] = ELFMAG1;
2126 2125 ehdr->e_ident[EI_MAG2] = ELFMAG2;
2127 2126 ehdr->e_ident[EI_MAG3] = ELFMAG3;
2128 2127 ehdr->e_ident[EI_CLASS] = ELFCLASS;
2129 2128 ehdr->e_type = ET_CORE;
2130 2129
2131 2130 #if !defined(_LP64) || defined(_ELF32_COMPAT)
2132 2131
2133 2132 #if defined(__sparc)
2134 2133 ehdr->e_ident[EI_DATA] = ELFDATA2MSB;
2135 2134 ehdr->e_machine = EM_SPARC;
2136 2135 #elif defined(__i386) || defined(__i386_COMPAT)
2137 2136 ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
2138 2137 ehdr->e_machine = EM_386;
2139 2138 #else
2140 2139 #error "no recognized machine type is defined"
2141 2140 #endif
2142 2141
2143 2142 #else /* !defined(_LP64) || defined(_ELF32_COMPAT) */
2144 2143
2145 2144 #if defined(__sparc)
2146 2145 ehdr->e_ident[EI_DATA] = ELFDATA2MSB;
2147 2146 ehdr->e_machine = EM_SPARCV9;
2148 2147 #elif defined(__amd64)
2149 2148 ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
2150 2149 ehdr->e_machine = EM_AMD64;
2151 2150 #else
2152 2151 #error "no recognized 64-bit machine type is defined"
2153 2152 #endif
2154 2153
2155 2154 #endif /* !defined(_LP64) || defined(_ELF32_COMPAT) */
2156 2155
2157 2156 /*
2158 2157 * If the count of program headers or section headers or the index
2159 2158 * of the section string table can't fit in the mere 16 bits
2160 2159 * shortsightedly allotted to them in the ELF header, we use the
2161 2160 * extended formats and put the real values in the section header
2162 2161 * as index 0.
2163 2162 */
2164 2163 ehdr->e_version = EV_CURRENT;
2165 2164 ehdr->e_ehsize = sizeof (Ehdr);
2166 2165
2167 2166 if (nphdrs >= PN_XNUM)
2168 2167 ehdr->e_phnum = PN_XNUM;
2169 2168 else
2170 2169 ehdr->e_phnum = (unsigned short)nphdrs;
2171 2170
2172 2171 ehdr->e_phoff = sizeof (Ehdr);
2173 2172 ehdr->e_phentsize = sizeof (Phdr);
2174 2173
2175 2174 if (nshdrs > 0) {
2176 2175 if (nshdrs >= SHN_LORESERVE)
2177 2176 ehdr->e_shnum = 0;
2178 2177 else
2179 2178 ehdr->e_shnum = (unsigned short)nshdrs;
2180 2179
2181 2180 if (nshdrs - 1 >= SHN_LORESERVE)
2182 2181 ehdr->e_shstrndx = SHN_XINDEX;
2183 2182 else
2184 2183 ehdr->e_shstrndx = (unsigned short)(nshdrs - 1);
2185 2184
2186 2185 ehdr->e_shoff = ehdr->e_phoff + ehdr->e_phentsize * nphdrs;
2187 2186 ehdr->e_shentsize = sizeof (Shdr);
2188 2187 }
2189 2188
2190 2189 if (error = core_write(vp, UIO_SYSSPACE, (offset_t)0, ehdr,
2191 2190 sizeof (Ehdr), rlimit, credp))
2192 2191 goto done;
2193 2192
2194 2193 poffset = sizeof (Ehdr);
2195 2194 soffset = sizeof (Ehdr) + phdrsz;
2196 2195 doffset = sizeof (Ehdr) + phdrsz + shdrsz;
2197 2196
2198 2197 v = &bigwad->phdr[0];
2199 2198 bzero(v, phdrsz);
2200 2199
2201 2200 setup_old_note_header(&v[0], p);
2202 2201 v[0].p_offset = doffset = roundup(doffset, sizeof (Word));
2203 2202 doffset += v[0].p_filesz;
2204 2203
2205 2204 setup_note_header(&v[1], p);
2206 2205 v[1].p_offset = doffset = roundup(doffset, sizeof (Word));
2207 2206 doffset += v[1].p_filesz;
2208 2207
2209 2208 mutex_enter(&p->p_lock);
2210 2209
2211 2210 brkbase = p->p_brkbase;
2212 2211 brksize = p->p_brksize;
2213 2212
2214 2213 stkbase = p->p_usrstack - p->p_stksize;
2215 2214 stksize = p->p_stksize;
2216 2215
2217 2216 mutex_exit(&p->p_lock);
2218 2217
2219 2218 AS_LOCK_ENTER(as, RW_WRITER);
2220 2219 i = 2;
2221 2220 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
2222 2221 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2223 2222 caddr_t saddr, naddr;
2224 2223 void *tmp = NULL;
2225 2224 extern struct seg_ops segspt_shmops;
2226 2225
2227 2226 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2228 2227 uint_t prot;
2229 2228 size_t size;
2230 2229 int type;
2231 2230 vnode_t *mvp;
2232 2231
2233 2232 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2234 2233 prot &= PROT_READ | PROT_WRITE | PROT_EXEC;
2235 2234 if ((size = (size_t)(naddr - saddr)) == 0)
2236 2235 continue;
2237 2236 if (i == nphdrs) {
2238 2237 overflow++;
2239 2238 continue;
2240 2239 }
2241 2240 v[i].p_type = PT_LOAD;
2242 2241 v[i].p_vaddr = (Addr)(uintptr_t)saddr;
2243 2242 v[i].p_memsz = size;
2244 2243 if (prot & PROT_READ)
2245 2244 v[i].p_flags |= PF_R;
2246 2245 if (prot & PROT_WRITE)
2247 2246 v[i].p_flags |= PF_W;
2248 2247 if (prot & PROT_EXEC)
2249 2248 v[i].p_flags |= PF_X;
2250 2249
2251 2250 /*
2252 2251 * Figure out which mappings to include in the core.
2253 2252 */
2254 2253 type = SEGOP_GETTYPE(seg, saddr);
2255 2254
2256 2255 if (saddr == stkbase && size == stksize) {
2257 2256 if (!(content & CC_CONTENT_STACK))
2258 2257 goto exclude;
2259 2258
2260 2259 } else if (saddr == brkbase && size == brksize) {
2261 2260 if (!(content & CC_CONTENT_HEAP))
2262 2261 goto exclude;
2263 2262
2264 2263 } else if (seg->s_ops == &segspt_shmops) {
2265 2264 if (type & MAP_NORESERVE) {
2266 2265 if (!(content & CC_CONTENT_DISM))
2267 2266 goto exclude;
2268 2267 } else {
2269 2268 if (!(content & CC_CONTENT_ISM))
2270 2269 goto exclude;
2271 2270 }
2272 2271
2273 2272 } else if (seg->s_ops != &segvn_ops) {
2274 2273 goto exclude;
2275 2274
2276 2275 } else if (type & MAP_SHARED) {
2277 2276 if (shmgetid(p, saddr) != SHMID_NONE) {
2278 2277 if (!(content & CC_CONTENT_SHM))
2279 2278 goto exclude;
2280 2279
2281 2280 } else if (SEGOP_GETVP(seg, seg->s_base,
2282 2281 &mvp) != 0 || mvp == NULL ||
2283 2282 mvp->v_type != VREG) {
2284 2283 if (!(content & CC_CONTENT_SHANON))
2285 2284 goto exclude;
2286 2285
2287 2286 } else {
2288 2287 if (!(content & CC_CONTENT_SHFILE))
2289 2288 goto exclude;
2290 2289 }
2291 2290
2292 2291 } else if (SEGOP_GETVP(seg, seg->s_base, &mvp) != 0 ||
2293 2292 mvp == NULL || mvp->v_type != VREG) {
2294 2293 if (!(content & CC_CONTENT_ANON))
2295 2294 goto exclude;
2296 2295
2297 2296 } else if (prot == (PROT_READ | PROT_EXEC)) {
2298 2297 if (!(content & CC_CONTENT_TEXT))
2299 2298 goto exclude;
2300 2299
2301 2300 } else if (prot == PROT_READ) {
2302 2301 if (!(content & CC_CONTENT_RODATA))
2303 2302 goto exclude;
2304 2303
2305 2304 } else {
2306 2305 if (!(content & CC_CONTENT_DATA))
2307 2306 goto exclude;
2308 2307 }
2309 2308
2310 2309 doffset = roundup(doffset, sizeof (Word));
2311 2310 v[i].p_offset = doffset;
2312 2311 v[i].p_filesz = size;
2313 2312 doffset += size;
2314 2313 exclude:
2315 2314 i++;
2316 2315 }
2317 2316 ASSERT(tmp == NULL);
2318 2317 }
2319 2318 AS_LOCK_EXIT(as);
2320 2319
2321 2320 if (overflow || i != nphdrs) {
2322 2321 if (ntries++ == 0) {
2323 2322 kmem_free(bigwad, bigsize);
2324 2323 overflow = 0;
2325 2324 goto top;
2326 2325 }
2327 2326 cmn_err(CE_WARN, "elfcore: core dump failed for "
2328 2327 "process %d; address space is changing", p->p_pid);
2329 2328 error = EIO;
2330 2329 goto done;
2331 2330 }
2332 2331
2333 2332 if ((error = core_write(vp, UIO_SYSSPACE, poffset,
2334 2333 v, phdrsz, rlimit, credp)) != 0)
2335 2334 goto done;
2336 2335
2337 2336 if ((error = write_old_elfnotes(p, sig, vp, v[0].p_offset, rlimit,
2338 2337 credp)) != 0)
2339 2338 goto done;
2340 2339
2341 2340 if ((error = write_elfnotes(p, sig, vp, v[1].p_offset, rlimit,
2342 2341 credp, content)) != 0)
2343 2342 goto done;
2344 2343
2345 2344 for (i = 2; i < nphdrs; i++) {
2346 2345 prkillinfo_t killinfo;
2347 2346 sigqueue_t *sq;
2348 2347 int sig, j;
2349 2348
2350 2349 if (v[i].p_filesz == 0)
2351 2350 continue;
2352 2351
2353 2352 /*
2354 2353 * If dumping out this segment fails, rather than failing
2355 2354 * the core dump entirely, we reset the size of the mapping
2356 2355 * to zero to indicate that the data is absent from the core
2357 2356 * file and or in the PF_SUNW_FAILURE flag to differentiate
2358 2357 * this from mappings that were excluded due to the core file
2359 2358 * content settings.
2360 2359 */
2361 2360 if ((error = core_seg(p, vp, v[i].p_offset,
2362 2361 (caddr_t)(uintptr_t)v[i].p_vaddr, v[i].p_filesz,
2363 2362 rlimit, credp)) == 0) {
2364 2363 continue;
2365 2364 }
2366 2365
2367 2366 if ((sig = lwp->lwp_cursig) == 0) {
2368 2367 /*
2369 2368 * We failed due to something other than a signal.
2370 2369 * Since the space reserved for the segment is now
2371 2370 * unused, we stash the errno in the first four
2372 2371 * bytes. This undocumented interface will let us
2373 2372 * understand the nature of the failure.
2374 2373 */
2375 2374 (void) core_write(vp, UIO_SYSSPACE, v[i].p_offset,
2376 2375 &error, sizeof (error), rlimit, credp);
2377 2376
2378 2377 v[i].p_filesz = 0;
2379 2378 v[i].p_flags |= PF_SUNW_FAILURE;
2380 2379 if ((error = core_write(vp, UIO_SYSSPACE,
2381 2380 poffset + sizeof (v[i]) * i, &v[i], sizeof (v[i]),
2382 2381 rlimit, credp)) != 0)
2383 2382 goto done;
2384 2383
2385 2384 continue;
2386 2385 }
2387 2386
2388 2387 /*
2389 2388 * We took a signal. We want to abort the dump entirely, but
2390 2389 * we also want to indicate what failed and why. We therefore
2391 2390 * use the space reserved for the first failing segment to
2392 2391 * write our error (which, for purposes of compatability with
2393 2392 * older core dump readers, we set to EINTR) followed by any
2394 2393 * siginfo associated with the signal.
2395 2394 */
2396 2395 bzero(&killinfo, sizeof (killinfo));
2397 2396 killinfo.prk_error = EINTR;
2398 2397
2399 2398 sq = sig == SIGKILL ? curproc->p_killsqp : lwp->lwp_curinfo;
2400 2399
2401 2400 if (sq != NULL) {
2402 2401 bcopy(&sq->sq_info, &killinfo.prk_info,
2403 2402 sizeof (sq->sq_info));
2404 2403 } else {
2405 2404 killinfo.prk_info.si_signo = lwp->lwp_cursig;
2406 2405 killinfo.prk_info.si_code = SI_NOINFO;
2407 2406 }
2408 2407
2409 2408 #if (defined(_SYSCALL32_IMPL) || defined(_LP64))
2410 2409 /*
2411 2410 * If this is a 32-bit process, we need to translate from the
2412 2411 * native siginfo to the 32-bit variant. (Core readers must
2413 2412 * always have the same data model as their target or must
2414 2413 * be aware of -- and compensate for -- data model differences.)
2415 2414 */
2416 2415 if (curproc->p_model == DATAMODEL_ILP32) {
2417 2416 siginfo32_t si32;
2418 2417
2419 2418 siginfo_kto32((k_siginfo_t *)&killinfo.prk_info, &si32);
2420 2419 bcopy(&si32, &killinfo.prk_info, sizeof (si32));
2421 2420 }
2422 2421 #endif
2423 2422
2424 2423 (void) core_write(vp, UIO_SYSSPACE, v[i].p_offset,
2425 2424 &killinfo, sizeof (killinfo), rlimit, credp);
2426 2425
2427 2426 /*
2428 2427 * For the segment on which we took the signal, indicate that
2429 2428 * its data now refers to a siginfo.
2430 2429 */
2431 2430 v[i].p_filesz = 0;
2432 2431 v[i].p_flags |= PF_SUNW_FAILURE | PF_SUNW_KILLED |
2433 2432 PF_SUNW_SIGINFO;
2434 2433
2435 2434 /*
2436 2435 * And for every other segment, indicate that its absence
2437 2436 * is due to a signal.
2438 2437 */
2439 2438 for (j = i + 1; j < nphdrs; j++) {
2440 2439 v[j].p_filesz = 0;
2441 2440 v[j].p_flags |= PF_SUNW_FAILURE | PF_SUNW_KILLED;
2442 2441 }
2443 2442
2444 2443 /*
2445 2444 * Finally, write out our modified program headers.
2446 2445 */
2447 2446 if ((error = core_write(vp, UIO_SYSSPACE,
2448 2447 poffset + sizeof (v[i]) * i, &v[i],
2449 2448 sizeof (v[i]) * (nphdrs - i), rlimit, credp)) != 0)
2450 2449 goto done;
2451 2450
2452 2451 break;
2453 2452 }
2454 2453
2455 2454 if (nshdrs > 0) {
2456 2455 bzero(&bigwad->shdr[0], shdrsz);
2457 2456
2458 2457 if (nshdrs >= SHN_LORESERVE)
2459 2458 bigwad->shdr[0].sh_size = nshdrs;
2460 2459
2461 2460 if (nshdrs - 1 >= SHN_LORESERVE)
2462 2461 bigwad->shdr[0].sh_link = nshdrs - 1;
2463 2462
2464 2463 if (nphdrs >= PN_XNUM)
2465 2464 bigwad->shdr[0].sh_info = nphdrs;
2466 2465
2467 2466 if (nshdrs > 1) {
2468 2467 AS_LOCK_ENTER(as, RW_WRITER);
2469 2468 if ((error = process_scns(content, p, credp, vp,
2470 2469 &bigwad->shdr[0], nshdrs, rlimit, &doffset,
2471 2470 NULL)) != 0) {
2472 2471 AS_LOCK_EXIT(as);
2473 2472 goto done;
2474 2473 }
2475 2474 AS_LOCK_EXIT(as);
2476 2475 }
2477 2476
2478 2477 if ((error = core_write(vp, UIO_SYSSPACE, soffset,
2479 2478 &bigwad->shdr[0], shdrsz, rlimit, credp)) != 0)
2480 2479 goto done;
2481 2480 }
2482 2481
2483 2482 done:
2484 2483 kmem_free(bigwad, bigsize);
2485 2484 return (error);
2486 2485 }
2487 2486
2488 2487 #ifndef _ELF32_COMPAT
2489 2488
2490 2489 static struct execsw esw = {
2491 2490 #ifdef _LP64
2492 2491 elf64magicstr,
2493 2492 #else /* _LP64 */
2494 2493 elf32magicstr,
2495 2494 #endif /* _LP64 */
2496 2495 0,
2497 2496 5,
2498 2497 elfexec,
2499 2498 elfcore
2500 2499 };
2501 2500
2502 2501 static struct modlexec modlexec = {
2503 2502 &mod_execops, "exec module for elf", &esw
2504 2503 };
2505 2504
2506 2505 #ifdef _LP64
2507 2506 extern int elf32exec(vnode_t *vp, execa_t *uap, uarg_t *args,
2508 2507 intpdata_t *idatap, int level, long *execsz,
2509 2508 int setid, caddr_t exec_file, cred_t *cred,
2510 2509 int *brand_action);
2511 2510 extern int elf32core(vnode_t *vp, proc_t *p, cred_t *credp,
2512 2511 rlim64_t rlimit, int sig, core_content_t content);
2513 2512
2514 2513 static struct execsw esw32 = {
2515 2514 elf32magicstr,
2516 2515 0,
2517 2516 5,
2518 2517 elf32exec,
2519 2518 elf32core
2520 2519 };
2521 2520
2522 2521 static struct modlexec modlexec32 = {
2523 2522 &mod_execops, "32-bit exec module for elf", &esw32
2524 2523 };
2525 2524 #endif /* _LP64 */
2526 2525
2527 2526 static struct modlinkage modlinkage = {
2528 2527 MODREV_1,
2529 2528 (void *)&modlexec,
2530 2529 #ifdef _LP64
2531 2530 (void *)&modlexec32,
2532 2531 #endif /* _LP64 */
2533 2532 NULL
2534 2533 };
2535 2534
2536 2535 int
2537 2536 _init(void)
2538 2537 {
2539 2538 return (mod_install(&modlinkage));
2540 2539 }
2541 2540
2542 2541 int
2543 2542 _fini(void)
2544 2543 {
2545 2544 return (mod_remove(&modlinkage));
2546 2545 }
2547 2546
2548 2547 int
2549 2548 _info(struct modinfo *modinfop)
2550 2549 {
2551 2550 return (mod_info(&modlinkage, modinfop));
2552 2551 }
2553 2552
2554 2553 #endif /* !_ELF32_COMPAT */
|
↓ open down ↓ |
1655 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX