Print this page
Merge hell test
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/exec/elf/elf.c
+++ new/usr/src/uts/common/exec/elf/elf.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
|
↓ open down ↓ |
19 lines elided |
↑ open up ↑ |
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 27 /* All Rights Reserved */
28 28 /*
29 29 * Copyright 2019 Joyent, Inc.
30 + * Copyright 2021 Oxide Computer Company
30 31 */
31 32
32 33 #include <sys/types.h>
33 34 #include <sys/param.h>
34 35 #include <sys/thread.h>
35 36 #include <sys/sysmacros.h>
36 37 #include <sys/signal.h>
37 38 #include <sys/cred.h>
38 39 #include <sys/user.h>
39 40 #include <sys/errno.h>
40 41 #include <sys/vnode.h>
41 42 #include <sys/mman.h>
42 43 #include <sys/kmem.h>
43 44 #include <sys/proc.h>
44 45 #include <sys/pathname.h>
45 46 #include <sys/policy.h>
46 47 #include <sys/cmn_err.h>
47 48 #include <sys/systm.h>
48 49 #include <sys/elf.h>
49 50 #include <sys/vmsystm.h>
50 51 #include <sys/debug.h>
51 52 #include <sys/auxv.h>
52 53 #include <sys/exec.h>
53 54 #include <sys/prsystm.h>
54 55 #include <vm/as.h>
55 56 #include <vm/rm.h>
56 57 #include <vm/seg.h>
57 58 #include <vm/seg_vn.h>
58 59 #include <sys/modctl.h>
59 60 #include <sys/systeminfo.h>
60 61 #include <sys/vmparam.h>
61 62 #include <sys/machelf.h>
62 63 #include <sys/shm_impl.h>
63 64 #include <sys/archsystm.h>
64 65 #include <sys/fasttrap.h>
65 66 #include <sys/brand.h>
66 67 #include "elf_impl.h"
67 68 #include <sys/sdt.h>
68 69 #include <sys/siginfo.h>
69 70 #include <sys/random.h>
70 71
71 72 #if defined(__x86)
72 73 #include <sys/comm_page_util.h>
73 74 #include <sys/fp.h>
74 75 #endif /* defined(__x86) */
75 76
76 77
77 78 extern int at_flags;
78 79 extern volatile size_t aslr_max_brk_skew;
79 80
80 81 #define ORIGIN_STR "ORIGIN"
81 82 #define ORIGIN_STR_SIZE 6
82 83
83 84 static int getelfhead(vnode_t *, cred_t *, Ehdr *, uint_t *, uint_t *,
84 85 uint_t *);
85 86 static int getelfphdr(vnode_t *, cred_t *, const Ehdr *, uint_t, caddr_t *,
86 87 size_t *);
|
↓ open down ↓ |
47 lines elided |
↑ open up ↑ |
87 88 static int getelfshdr(vnode_t *, cred_t *, const Ehdr *, uint_t, uint_t,
88 89 caddr_t *, size_t *, caddr_t *, size_t *);
89 90 static size_t elfsize(const Ehdr *, uint_t, const caddr_t, uintptr_t *);
90 91 static int mapelfexec(vnode_t *, Ehdr *, uint_t, caddr_t, Phdr **, Phdr **,
91 92 Phdr **, Phdr **, Phdr *, caddr_t *, caddr_t *, intptr_t *, uintptr_t *,
92 93 size_t, size_t *, size_t *);
93 94
94 95 #ifdef _ELF32_COMPAT
95 96 /* Link against the non-compat instances when compiling the 32-bit version. */
96 97 extern size_t elf_datasz_max;
98 +extern size_t elf_zeropg_sz;
97 99 extern void elf_ctx_resize_scratch(elf_core_ctx_t *, size_t);
98 100 extern uint_t elf_nphdr_max;
99 101 extern uint_t elf_nshdr_max;
100 102 extern size_t elf_shstrtab_max;
101 103 #else
102 104 size_t elf_datasz_max = 1 * 1024 * 1024;
105 +size_t elf_zeropg_sz = 4 * 1024;
103 106 uint_t elf_nphdr_max = 1000;
104 107 uint_t elf_nshdr_max = 10000;
105 108 size_t elf_shstrtab_max = 100 * 1024;
106 109 #endif
107 110
108 111
109 112
110 113 typedef enum {
111 114 STR_CTF,
112 115 STR_SYMTAB,
113 116 STR_DYNSYM,
114 117 STR_STRTAB,
115 118 STR_DYNSTR,
116 119 STR_SHSTRTAB,
117 120 STR_NUM
118 121 } shstrtype_t;
119 122
120 123 static const char *shstrtab_data[] = {
121 124 ".SUNW_ctf",
122 125 ".symtab",
123 126 ".dynsym",
124 127 ".strtab",
125 128 ".dynstr",
126 129 ".shstrtab"
127 130 };
128 131
129 132 typedef struct shstrtab {
130 133 uint_t sst_ndx[STR_NUM];
131 134 uint_t sst_cur;
132 135 } shstrtab_t;
133 136
134 137 static void
135 138 shstrtab_init(shstrtab_t *s)
136 139 {
137 140 bzero(&s->sst_ndx, sizeof (s->sst_ndx));
138 141 s->sst_cur = 1;
139 142 }
140 143
141 144 static uint_t
142 145 shstrtab_ndx(shstrtab_t *s, shstrtype_t type)
143 146 {
144 147 uint_t ret;
145 148
146 149 if ((ret = s->sst_ndx[type]) != 0)
147 150 return (ret);
148 151
149 152 ret = s->sst_ndx[type] = s->sst_cur;
150 153 s->sst_cur += strlen(shstrtab_data[type]) + 1;
151 154
152 155 return (ret);
153 156 }
154 157
155 158 static size_t
156 159 shstrtab_size(const shstrtab_t *s)
157 160 {
158 161 return (s->sst_cur);
159 162 }
160 163
161 164 static void
162 165 shstrtab_dump(const shstrtab_t *s, char *buf)
163 166 {
164 167 uint_t i, ndx;
165 168
166 169 *buf = '\0';
167 170 for (i = 0; i < STR_NUM; i++) {
168 171 if ((ndx = s->sst_ndx[i]) != 0)
169 172 (void) strcpy(buf + ndx, shstrtab_data[i]);
170 173 }
171 174 }
172 175
173 176 static int
174 177 dtrace_safe_phdr(Phdr *phdrp, struct uarg *args, uintptr_t base)
175 178 {
176 179 ASSERT(phdrp->p_type == PT_SUNWDTRACE);
177 180
178 181 /*
179 182 * See the comment in fasttrap.h for information on how to safely
180 183 * update this program header.
181 184 */
182 185 if (phdrp->p_memsz < PT_SUNWDTRACE_SIZE ||
183 186 (phdrp->p_flags & (PF_R | PF_W | PF_X)) != (PF_R | PF_W | PF_X))
184 187 return (-1);
185 188
186 189 args->thrptr = phdrp->p_vaddr + base;
187 190
188 191 return (0);
189 192 }
190 193
191 194 static int
192 195 handle_secflag_dt(proc_t *p, uint_t dt, uint_t val)
193 196 {
194 197 uint_t flag;
195 198
196 199 switch (dt) {
197 200 case DT_SUNW_ASLR:
198 201 flag = PROC_SEC_ASLR;
199 202 break;
200 203 default:
201 204 return (EINVAL);
202 205 }
203 206
204 207 if (val == 0) {
205 208 if (secflag_isset(p->p_secflags.psf_lower, flag))
206 209 return (EPERM);
207 210 if ((secpolicy_psecflags(CRED(), p, p) != 0) &&
208 211 secflag_isset(p->p_secflags.psf_inherit, flag))
209 212 return (EPERM);
210 213
211 214 secflag_clear(&p->p_secflags.psf_effective, flag);
212 215 } else {
213 216 if (!secflag_isset(p->p_secflags.psf_upper, flag))
214 217 return (EPERM);
215 218
216 219 if ((secpolicy_psecflags(CRED(), p, p) != 0) &&
217 220 !secflag_isset(p->p_secflags.psf_inherit, flag))
218 221 return (EPERM);
219 222
220 223 secflag_set(&p->p_secflags.psf_effective, flag);
221 224 }
222 225
223 226 return (0);
224 227 }
225 228
226 229
227 230 #ifndef _ELF32_COMPAT
228 231 void
229 232 elf_ctx_resize_scratch(elf_core_ctx_t *ctx, size_t sz)
230 233 {
231 234 size_t target = MIN(sz, elf_datasz_max);
232 235
233 236 if (target > ctx->ecc_bufsz) {
234 237 if (ctx->ecc_buf != NULL) {
235 238 kmem_free(ctx->ecc_buf, ctx->ecc_bufsz);
236 239 }
237 240 ctx->ecc_buf = kmem_alloc(target, KM_SLEEP);
238 241 ctx->ecc_bufsz = target;
239 242 }
240 243 }
241 244 #endif /* _ELF32_COMPAT */
242 245
243 246 /*
244 247 * Map in the executable pointed to by vp. Returns 0 on success. Note that
245 248 * this function currently has the maximum number of arguments allowed by
246 249 * modstubs on x86 (MAXNARG)! Do _not_ add to this function signature without
247 250 * adding to MAXNARG. (Better yet, do not add to this monster of a function
248 251 * signature!)
249 252 */
250 253 int
251 254 mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
252 255 intptr_t *voffset, caddr_t exec_file, char **interpp, caddr_t *bssbase,
253 256 caddr_t *brkbase, size_t *brksize, uintptr_t *lddatap, uintptr_t *minaddrp)
254 257 {
255 258 size_t len, phdrsize;
256 259 struct vattr vat;
257 260 caddr_t phdrbase = NULL;
258 261 uint_t nshdrs, shstrndx, nphdrs;
259 262 int error = 0;
260 263 Phdr *uphdr = NULL;
261 264 Phdr *junk = NULL;
262 265 Phdr *dynphdr = NULL;
263 266 Phdr *dtrphdr = NULL;
264 267 char *interp = NULL;
265 268 uintptr_t lddata, minaddr;
266 269 size_t execsz;
267 270
268 271 if (lddatap != NULL)
269 272 *lddatap = 0;
270 273
271 274 if (minaddrp != NULL)
272 275 *minaddrp = (uintptr_t)NULL;
273 276
274 277 if (error = execpermissions(vp, &vat, args)) {
275 278 uprintf("%s: Cannot execute %s\n", exec_file, args->pathname);
276 279 return (error);
277 280 }
278 281
279 282 if ((error = getelfhead(vp, CRED(), ehdr, &nshdrs, &shstrndx,
280 283 &nphdrs)) != 0 ||
281 284 (error = getelfphdr(vp, CRED(), ehdr, nphdrs, &phdrbase,
282 285 &phdrsize)) != 0) {
283 286 uprintf("%s: Cannot read %s\n", exec_file, args->pathname);
284 287 return (error);
285 288 }
286 289
287 290 if ((len = elfsize(ehdr, nphdrs, phdrbase, &lddata)) == 0) {
288 291 uprintf("%s: Nothing to load in %s", exec_file, args->pathname);
289 292 kmem_free(phdrbase, phdrsize);
290 293 return (ENOEXEC);
291 294 }
292 295 if (lddatap != NULL)
293 296 *lddatap = lddata;
294 297
295 298 if (error = mapelfexec(vp, ehdr, nphdrs, phdrbase, &uphdr, &dynphdr,
296 299 &junk, &dtrphdr, NULL, bssbase, brkbase, voffset, &minaddr,
297 300 len, &execsz, brksize)) {
298 301 uprintf("%s: Cannot map %s\n", exec_file, args->pathname);
299 302 if (uphdr != NULL && uphdr->p_flags == 0)
300 303 kmem_free(uphdr, sizeof (Phdr));
301 304 kmem_free(phdrbase, phdrsize);
302 305 return (error);
303 306 }
304 307
305 308 if (minaddrp != NULL)
306 309 *minaddrp = minaddr;
307 310
308 311 /*
309 312 * If the executable requires an interpreter, determine its name.
310 313 */
311 314 if (dynphdr != NULL) {
312 315 ssize_t resid;
313 316
314 317 if (dynphdr->p_filesz > MAXPATHLEN || dynphdr->p_filesz == 0) {
315 318 uprintf("%s: Invalid interpreter\n", exec_file);
316 319 kmem_free(phdrbase, phdrsize);
317 320 return (ENOEXEC);
318 321 }
319 322
320 323 interp = kmem_alloc(MAXPATHLEN, KM_SLEEP);
321 324
322 325 if ((error = vn_rdwr(UIO_READ, vp, interp,
323 326 (ssize_t)dynphdr->p_filesz,
324 327 (offset_t)dynphdr->p_offset, UIO_SYSSPACE, 0,
325 328 (rlim64_t)0, CRED(), &resid)) != 0 || resid != 0 ||
326 329 interp[dynphdr->p_filesz - 1] != '\0') {
327 330 uprintf("%s: Cannot obtain interpreter pathname\n",
328 331 exec_file);
329 332 kmem_free(interp, MAXPATHLEN);
330 333 kmem_free(phdrbase, phdrsize);
331 334 return (error != 0 ? error : ENOEXEC);
332 335 }
333 336 }
334 337
335 338 /*
336 339 * If this is a statically linked executable, voffset should indicate
337 340 * the address of the executable itself (it normally holds the address
338 341 * of the interpreter).
339 342 */
340 343 if (ehdr->e_type == ET_EXEC && interp == NULL)
341 344 *voffset = minaddr;
342 345
343 346 /*
344 347 * If the caller has asked for the interpreter name, return it (it's
345 348 * up to the caller to free it); if the caller hasn't asked for it,
346 349 * free it ourselves.
347 350 */
348 351 if (interpp != NULL) {
349 352 *interpp = interp;
350 353 } else if (interp != NULL) {
351 354 kmem_free(interp, MAXPATHLEN);
352 355 }
353 356
354 357 if (uphdr != NULL) {
355 358 *uphdr_vaddr = uphdr->p_vaddr;
356 359
357 360 if (uphdr->p_flags == 0)
358 361 kmem_free(uphdr, sizeof (Phdr));
359 362 } else if (ehdr->e_type == ET_DYN) {
360 363 /*
361 364 * If we don't have a uphdr, we'll apply the logic found
362 365 * in mapelfexec() and use the p_vaddr of the first PT_LOAD
363 366 * section as the base address of the object.
364 367 */
365 368 const Phdr *phdr = (Phdr *)phdrbase;
366 369 const uint_t hsize = ehdr->e_phentsize;
367 370 uint_t i;
368 371
369 372 for (i = nphdrs; i > 0; i--) {
370 373 if (phdr->p_type == PT_LOAD) {
371 374 *uphdr_vaddr = (uintptr_t)phdr->p_vaddr +
372 375 ehdr->e_phoff;
373 376 break;
374 377 }
375 378
376 379 phdr = (Phdr *)((caddr_t)phdr + hsize);
377 380 }
378 381
379 382 /*
380 383 * If we don't have a PT_LOAD segment, we should have returned
381 384 * ENOEXEC when elfsize() returned 0, above.
382 385 */
383 386 VERIFY(i > 0);
384 387 } else {
385 388 *uphdr_vaddr = (Addr)-1;
386 389 }
387 390
388 391 kmem_free(phdrbase, phdrsize);
389 392 return (error);
390 393 }
391 394
392 395 /*ARGSUSED*/
393 396 int
394 397 elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
395 398 int level, size_t *execsz, int setid, caddr_t exec_file, cred_t *cred,
396 399 int *brand_action)
397 400 {
398 401 caddr_t phdrbase = NULL;
399 402 caddr_t bssbase = 0;
400 403 caddr_t brkbase = 0;
401 404 size_t brksize = 0;
402 405 size_t dlnsize, nsize = 0;
403 406 aux_entry_t *aux;
404 407 int error;
405 408 ssize_t resid;
406 409 int fd = -1;
407 410 intptr_t voffset;
408 411 Phdr *intphdr = NULL;
409 412 Phdr *dynamicphdr = NULL;
410 413 Phdr *stphdr = NULL;
411 414 Phdr *uphdr = NULL;
412 415 Phdr *junk = NULL;
413 416 size_t len;
414 417 size_t postfixsize = 0;
415 418 size_t i;
416 419 Phdr *phdrp;
417 420 Phdr *dataphdrp = NULL;
418 421 Phdr *dtrphdr;
419 422 Phdr *capphdr = NULL;
420 423 Cap *cap = NULL;
421 424 size_t capsize;
422 425 int hasu = 0;
423 426 int hasauxv = 0;
424 427 int hasintp = 0;
425 428 int branded = 0;
426 429 int dynuphdr = 0;
427 430
428 431 struct proc *p = ttoproc(curthread);
429 432 struct user *up = PTOU(p);
430 433 struct bigwad {
431 434 Ehdr ehdr;
432 435 aux_entry_t elfargs[__KERN_NAUXV_IMPL];
433 436 char dl_name[MAXPATHLEN];
434 437 char pathbuf[MAXPATHLEN];
435 438 struct vattr vattr;
436 439 struct execenv exenv;
437 440 } *bigwad; /* kmem_alloc this behemoth so we don't blow stack */
438 441 Ehdr *ehdrp;
439 442 uint_t nshdrs, shstrndx, nphdrs;
440 443 size_t phdrsize;
441 444 char *dlnp;
442 445 char *pathbufp;
443 446 rlim64_t limit;
444 447 rlim64_t roundlimit;
445 448
446 449 ASSERT(p->p_model == DATAMODEL_ILP32 || p->p_model == DATAMODEL_LP64);
447 450
448 451 bigwad = kmem_alloc(sizeof (struct bigwad), KM_SLEEP);
449 452 ehdrp = &bigwad->ehdr;
450 453 dlnp = bigwad->dl_name;
451 454 pathbufp = bigwad->pathbuf;
452 455
453 456 /*
454 457 * Obtain ELF and program header information.
455 458 */
456 459 if ((error = getelfhead(vp, CRED(), ehdrp, &nshdrs, &shstrndx,
457 460 &nphdrs)) != 0 ||
458 461 (error = getelfphdr(vp, CRED(), ehdrp, nphdrs, &phdrbase,
459 462 &phdrsize)) != 0)
460 463 goto out;
461 464
462 465 /*
463 466 * Prevent executing an ELF file that has no entry point.
464 467 */
465 468 if (ehdrp->e_entry == 0) {
466 469 uprintf("%s: Bad entry point\n", exec_file);
467 470 goto bad;
468 471 }
469 472
470 473 /*
471 474 * Put data model that we're exec-ing to into the args passed to
472 475 * exec_args(), so it will know what it is copying to on new stack.
473 476 * Now that we know whether we are exec-ing a 32-bit or 64-bit
474 477 * executable, we can set execsz with the appropriate NCARGS.
475 478 */
476 479 #ifdef _LP64
477 480 if (ehdrp->e_ident[EI_CLASS] == ELFCLASS32) {
478 481 args->to_model = DATAMODEL_ILP32;
479 482 *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS32-1);
480 483 } else {
481 484 args->to_model = DATAMODEL_LP64;
482 485 if (!args->stk_prot_override) {
483 486 args->stk_prot &= ~PROT_EXEC;
484 487 }
485 488 #if defined(__i386) || defined(__amd64)
486 489 args->dat_prot &= ~PROT_EXEC;
487 490 #endif
488 491 *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS64-1);
489 492 }
490 493 #else /* _LP64 */
491 494 args->to_model = DATAMODEL_ILP32;
492 495 *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS-1);
493 496 #endif /* _LP64 */
494 497
495 498 /*
496 499 * We delay invoking the brand callback until we've figured out what
497 500 * kind of elf binary we're trying to run, 32-bit or 64-bit. We do this
498 501 * because now the brand library can just check args->to_model to see if
499 502 * the target is 32-bit or 64-bit without having do duplicate all the
500 503 * code above.
501 504 *
502 505 * We also give the brand a chance to indicate that based on the ELF
503 506 * OSABI of the target binary it should become unbranded and optionally
504 507 * indicate that it should be treated as existing in a specific prefix.
505 508 *
506 509 * Note that if a brand opts to go down this route it does not actually
507 510 * end up being debranded. In other words, future programs that exec
508 511 * will still be considered for branding unless this escape hatch is
509 512 * used. Consider the case of lx brand for example. If a user runs
510 513 * /native/usr/sbin/dtrace -c /bin/ls, the isaexec and normal executable
511 514 * of DTrace that's in /native will take this escape hatch and be run
512 515 * and interpreted using the normal system call table; however, the
513 516 * execution of a non-illumos binary in the form of /bin/ls will still
514 517 * be branded and be subject to all of the normal actions of the brand.
515 518 *
516 519 * The level checks associated with brand handling below are used to
517 520 * prevent a loop since the brand elfexec function typically comes back
518 521 * through this function. We must check <= here since the nested
519 522 * handling in the #! interpreter code will increment the level before
520 523 * calling gexec to run the final elfexec interpreter.
521 524 */
522 525 if ((level <= INTP_MAXDEPTH) && (*brand_action != EBA_NATIVE) &&
523 526 (PROC_IS_BRANDED(p)) && (BROP(p)->b_native_exec != NULL)) {
524 527 if (BROP(p)->b_native_exec(ehdrp->e_ident[EI_OSABI],
525 528 &args->brand_nroot) == B_TRUE) {
526 529 ASSERT(ehdrp->e_ident[EI_OSABI]);
527 530 *brand_action = EBA_NATIVE;
528 531 /* Add one for the trailing '/' in the path */
529 532 if (args->brand_nroot != NULL)
530 533 nsize = strlen(args->brand_nroot) + 1;
531 534 }
532 535 }
533 536
534 537 if ((level <= INTP_MAXDEPTH) &&
535 538 (*brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
536 539 error = BROP(p)->b_elfexec(vp, uap, args,
537 540 idatap, level + 1, execsz, setid, exec_file, cred,
538 541 brand_action);
539 542 goto out;
540 543 }
541 544
542 545 /*
543 546 * Determine aux size now so that stack can be built
544 547 * in one shot (except actual copyout of aux image),
545 548 * determine any non-default stack protections,
546 549 * and still have this code be machine independent.
547 550 */
548 551 const uint_t hsize = ehdrp->e_phentsize;
549 552 phdrp = (Phdr *)phdrbase;
550 553 for (i = nphdrs; i > 0; i--) {
551 554 switch (phdrp->p_type) {
552 555 case PT_INTERP:
553 556 hasauxv = hasintp = 1;
554 557 break;
555 558 case PT_PHDR:
556 559 hasu = 1;
557 560 break;
558 561 case PT_SUNWSTACK:
559 562 args->stk_prot = PROT_USER;
560 563 if (phdrp->p_flags & PF_R)
561 564 args->stk_prot |= PROT_READ;
562 565 if (phdrp->p_flags & PF_W)
563 566 args->stk_prot |= PROT_WRITE;
564 567 if (phdrp->p_flags & PF_X)
565 568 args->stk_prot |= PROT_EXEC;
566 569 break;
567 570 case PT_LOAD:
568 571 dataphdrp = phdrp;
569 572 break;
570 573 case PT_SUNWCAP:
571 574 capphdr = phdrp;
572 575 break;
573 576 case PT_DYNAMIC:
574 577 dynamicphdr = phdrp;
575 578 break;
576 579 }
577 580 phdrp = (Phdr *)((caddr_t)phdrp + hsize);
578 581 }
579 582
580 583 if (ehdrp->e_type != ET_EXEC) {
581 584 dataphdrp = NULL;
582 585 hasauxv = 1;
583 586 }
584 587
585 588 /* Copy BSS permissions to args->dat_prot */
586 589 if (dataphdrp != NULL) {
587 590 args->dat_prot = PROT_USER;
588 591 if (dataphdrp->p_flags & PF_R)
589 592 args->dat_prot |= PROT_READ;
590 593 if (dataphdrp->p_flags & PF_W)
591 594 args->dat_prot |= PROT_WRITE;
592 595 if (dataphdrp->p_flags & PF_X)
593 596 args->dat_prot |= PROT_EXEC;
594 597 }
595 598
596 599 /*
597 600 * If a auxvector will be required - reserve the space for
598 601 * it now. This may be increased by exec_args if there are
599 602 * ISA-specific types (included in __KERN_NAUXV_IMPL).
600 603 */
601 604 if (hasauxv) {
602 605 /*
603 606 * If a AUX vector is being built - the base AUX
604 607 * entries are:
605 608 *
606 609 * AT_BASE
607 610 * AT_FLAGS
608 611 * AT_PAGESZ
609 612 * AT_RANDOM (added in stk_copyout)
610 613 * AT_SUN_AUXFLAGS
611 614 * AT_SUN_HWCAP
612 615 * AT_SUN_HWCAP2
613 616 * AT_SUN_PLATFORM (added in stk_copyout)
614 617 * AT_SUN_EXECNAME (added in stk_copyout)
615 618 * AT_NULL
616 619 *
617 620 * total == 10
618 621 */
619 622 if (hasintp && hasu) {
620 623 /*
621 624 * Has PT_INTERP & PT_PHDR - the auxvectors that
622 625 * will be built are:
623 626 *
624 627 * AT_PHDR
625 628 * AT_PHENT
626 629 * AT_PHNUM
627 630 * AT_ENTRY
628 631 * AT_LDDATA
629 632 *
630 633 * total = 5
631 634 */
632 635 args->auxsize = (10 + 5) * sizeof (aux_entry_t);
633 636 } else if (hasintp) {
634 637 /*
635 638 * Has PT_INTERP but no PT_PHDR
636 639 *
637 640 * AT_EXECFD
638 641 * AT_LDDATA
639 642 *
640 643 * total = 2
641 644 */
642 645 args->auxsize = (10 + 2) * sizeof (aux_entry_t);
643 646 } else {
644 647 args->auxsize = 10 * sizeof (aux_entry_t);
645 648 }
646 649 } else {
647 650 args->auxsize = 0;
648 651 }
649 652
650 653 /*
651 654 * If this binary is using an emulator, we need to add an
652 655 * AT_SUN_EMULATOR aux entry.
653 656 */
654 657 if (args->emulator != NULL)
655 658 args->auxsize += sizeof (aux_entry_t);
656 659
657 660 /*
658 661 * If this is a native binary that's been given a modified interpreter
659 662 * root, inform it that the native system exists at that root.
660 663 */
661 664 if (args->brand_nroot != NULL) {
662 665 args->auxsize += sizeof (aux_entry_t);
663 666 }
664 667
665 668
666 669 /*
667 670 * On supported kernels (x86_64) make room in the auxv for the
668 671 * AT_SUN_COMMPAGE entry. This will go unpopulated on i86xpv systems
669 672 * which do not provide such functionality.
670 673 *
671 674 * Additionally cover the floating point information AT_SUN_FPSIZE and
672 675 * AT_SUN_FPTYPE.
673 676 */
674 677 #if defined(__amd64)
675 678 args->auxsize += 3 * sizeof (aux_entry_t);
676 679 #endif /* defined(__amd64) */
677 680
678 681 /*
679 682 * If we have user credentials, we'll supply the following entries:
680 683 * AT_SUN_UID
681 684 * AT_SUN_RUID
682 685 * AT_SUN_GID
683 686 * AT_SUN_RGID
684 687 */
685 688 if (cred != NULL) {
686 689 args->auxsize += 4 * sizeof (aux_entry_t);
687 690 }
688 691
689 692 if ((*brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
690 693 branded = 1;
691 694 /*
692 695 * We will be adding 5 entries to the aux vectors. One for
693 696 * the the brandname and 4 for the brand specific aux vectors.
694 697 */
695 698 args->auxsize += 5 * sizeof (aux_entry_t);
696 699 }
697 700
698 701 /* If the binary has an explicit ASLR flag, it must be honoured */
699 702 if ((dynamicphdr != NULL) && (dynamicphdr->p_filesz > 0)) {
700 703 const size_t dynfilesz = dynamicphdr->p_filesz;
701 704 const size_t dynoffset = dynamicphdr->p_offset;
702 705 Dyn *dyn, *dp;
703 706
704 707 if (dynoffset > MAXOFFSET_T ||
705 708 dynfilesz > MAXOFFSET_T ||
706 709 dynoffset + dynfilesz > MAXOFFSET_T) {
707 710 uprintf("%s: cannot read full .dynamic section\n",
708 711 exec_file);
709 712 error = EINVAL;
710 713 goto out;
711 714 }
712 715
713 716 #define DYN_STRIDE 100
714 717 for (i = 0; i < dynfilesz; i += sizeof (*dyn) * DYN_STRIDE) {
715 718 const size_t remdyns = (dynfilesz - i) / sizeof (*dyn);
716 719 const size_t ndyns = MIN(DYN_STRIDE, remdyns);
717 720 const size_t dynsize = ndyns * sizeof (*dyn);
718 721
719 722 dyn = kmem_alloc(dynsize, KM_SLEEP);
720 723
721 724 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)dyn,
722 725 (ssize_t)dynsize, (offset_t)(dynoffset + i),
723 726 UIO_SYSSPACE, 0, (rlim64_t)0,
724 727 CRED(), NULL)) != 0) {
725 728 uprintf("%s: cannot read .dynamic section\n",
726 729 exec_file);
727 730 goto out;
728 731 }
729 732
730 733 for (dp = dyn; dp < (dyn + ndyns); dp++) {
731 734 if (dp->d_tag == DT_SUNW_ASLR) {
732 735 if ((error = handle_secflag_dt(p,
733 736 DT_SUNW_ASLR,
734 737 dp->d_un.d_val)) != 0) {
735 738 uprintf("%s: error setting "
736 739 "security-flag from "
737 740 "DT_SUNW_ASLR: %d\n",
738 741 exec_file, error);
739 742 goto out;
740 743 }
741 744 }
742 745 }
743 746
744 747 kmem_free(dyn, dynsize);
745 748 }
746 749 }
747 750
748 751 /* Hardware/Software capabilities */
749 752 if (capphdr != NULL &&
750 753 (capsize = capphdr->p_filesz) > 0 &&
751 754 capsize <= 16 * sizeof (*cap)) {
752 755 const uint_t ncaps = capsize / sizeof (*cap);
753 756 Cap *cp;
754 757
755 758 cap = kmem_alloc(capsize, KM_SLEEP);
756 759 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)cap,
757 760 (ssize_t)capsize, (offset_t)capphdr->p_offset,
758 761 UIO_SYSSPACE, 0, (rlim64_t)0, CRED(), NULL)) != 0) {
759 762 uprintf("%s: Cannot read capabilities section\n",
760 763 exec_file);
761 764 goto out;
762 765 }
763 766 for (cp = cap; cp < cap + ncaps; cp++) {
764 767 if (cp->c_tag == CA_SUNW_SF_1 &&
765 768 (cp->c_un.c_val & SF1_SUNW_ADDR32)) {
766 769 if (args->to_model == DATAMODEL_LP64)
767 770 args->addr32 = 1;
768 771 break;
769 772 }
770 773 }
771 774 }
772 775
773 776 aux = bigwad->elfargs;
774 777 /*
775 778 * Move args to the user's stack.
776 779 * This can fill in the AT_SUN_PLATFORM, AT_SUN_EXECNAME and AT_RANDOM
777 780 * aux entries.
778 781 */
779 782 if ((error = exec_args(uap, args, idatap, (void **)&aux)) != 0) {
780 783 if (error == -1) {
781 784 error = ENOEXEC;
782 785 goto bad;
783 786 }
784 787 goto out;
785 788 }
786 789 /* we're single threaded after this point */
787 790
788 791 /*
789 792 * If this is an ET_DYN executable (shared object),
790 793 * determine its memory size so that mapelfexec() can load it.
791 794 */
792 795 if (ehdrp->e_type == ET_DYN)
793 796 len = elfsize(ehdrp, nphdrs, phdrbase, NULL);
794 797 else
795 798 len = 0;
796 799
797 800 dtrphdr = NULL;
798 801
799 802 error = mapelfexec(vp, ehdrp, nphdrs, phdrbase, &uphdr, &intphdr,
800 803 &stphdr, &dtrphdr, dataphdrp, &bssbase, &brkbase, &voffset, NULL,
801 804 len, execsz, &brksize);
802 805 /*
803 806 * Our uphdr has been dynamically allocated if (and only if) its
804 807 * program header flags are clear. To avoid leaks, this must be
805 808 * checked regardless of whether mapelfexec() emitted an error.
806 809 */
807 810 dynuphdr = (uphdr != NULL && uphdr->p_flags == 0);
808 811
809 812 if (error != 0) {
810 813 goto bad;
811 814 }
812 815
813 816 if (uphdr != NULL && intphdr == NULL)
814 817 goto bad;
815 818
816 819 if (dtrphdr != NULL && dtrace_safe_phdr(dtrphdr, args, voffset) != 0) {
817 820 uprintf("%s: Bad DTrace phdr in %s\n", exec_file, exec_file);
818 821 goto bad;
819 822 }
820 823
821 824 if (intphdr != NULL) {
822 825 size_t len;
823 826 uintptr_t lddata;
824 827 char *p;
825 828 struct vnode *nvp;
826 829
827 830 dlnsize = intphdr->p_filesz + nsize;
828 831
829 832 /*
830 833 * Make sure none of the component pieces of dlnsize result in
831 834 * an oversized or zeroed result.
832 835 */
833 836 if (intphdr->p_filesz > MAXPATHLEN || dlnsize > MAXPATHLEN ||
834 837 dlnsize == 0 || dlnsize < intphdr->p_filesz) {
835 838 goto bad;
836 839 }
837 840
838 841 if (nsize != 0) {
839 842 bcopy(args->brand_nroot, dlnp, nsize - 1);
840 843 dlnp[nsize - 1] = '/';
841 844 }
842 845
843 846 /*
844 847 * Read in "interpreter" pathname.
845 848 */
846 849 if ((error = vn_rdwr(UIO_READ, vp, dlnp + nsize,
847 850 (ssize_t)intphdr->p_filesz, (offset_t)intphdr->p_offset,
848 851 UIO_SYSSPACE, 0, (rlim64_t)0, CRED(), &resid)) != 0) {
849 852 uprintf("%s: Cannot obtain interpreter pathname\n",
850 853 exec_file);
851 854 goto bad;
852 855 }
853 856
854 857 if (resid != 0 || dlnp[dlnsize - 1] != '\0')
855 858 goto bad;
856 859
857 860 /*
858 861 * Search for '$ORIGIN' token in interpreter path.
859 862 * If found, expand it.
860 863 */
861 864 for (p = dlnp; p = strchr(p, '$'); ) {
862 865 uint_t len, curlen;
863 866 char *_ptr;
864 867
865 868 if (strncmp(++p, ORIGIN_STR, ORIGIN_STR_SIZE))
866 869 continue;
867 870
868 871 /*
869 872 * We don't support $ORIGIN on setid programs to close
870 873 * a potential attack vector.
871 874 */
872 875 if ((setid & EXECSETID_SETID) != 0) {
873 876 error = ENOEXEC;
874 877 goto bad;
875 878 }
876 879
877 880 curlen = 0;
878 881 len = p - dlnp - 1;
879 882 if (len) {
880 883 bcopy(dlnp, pathbufp, len);
881 884 curlen += len;
882 885 }
883 886 if (_ptr = strrchr(args->pathname, '/')) {
884 887 len = _ptr - args->pathname;
885 888 if ((curlen + len) > MAXPATHLEN)
886 889 break;
887 890
888 891 bcopy(args->pathname, &pathbufp[curlen], len);
889 892 curlen += len;
890 893 } else {
891 894 /*
892 895 * executable is a basename found in the
893 896 * current directory. So - just substitue
894 897 * '.' for ORIGIN.
895 898 */
896 899 pathbufp[curlen] = '.';
897 900 curlen++;
898 901 }
899 902 p += ORIGIN_STR_SIZE;
900 903 len = strlen(p);
901 904
902 905 if ((curlen + len) > MAXPATHLEN)
903 906 break;
904 907 bcopy(p, &pathbufp[curlen], len);
905 908 curlen += len;
906 909 pathbufp[curlen++] = '\0';
907 910 bcopy(pathbufp, dlnp, curlen);
908 911 }
909 912
910 913 /*
911 914 * /usr/lib/ld.so.1 is known to be a symlink to /lib/ld.so.1
912 915 * (and /usr/lib/64/ld.so.1 is a symlink to /lib/64/ld.so.1).
913 916 * Just in case /usr is not mounted, change it now.
914 917 */
915 918 if (strcmp(dlnp, USR_LIB_RTLD) == 0)
916 919 dlnp += 4;
917 920 error = lookupname(dlnp, UIO_SYSSPACE, FOLLOW, NULLVPP, &nvp);
918 921 if (error && dlnp != bigwad->dl_name) {
919 922 /* new kernel, old user-level */
920 923 error = lookupname(dlnp -= 4, UIO_SYSSPACE, FOLLOW,
921 924 NULLVPP, &nvp);
922 925 }
923 926 if (error) {
924 927 uprintf("%s: Cannot find %s\n", exec_file, dlnp);
925 928 goto bad;
926 929 }
927 930
928 931 /*
929 932 * Setup the "aux" vector.
930 933 */
931 934 if (uphdr) {
932 935 if (ehdrp->e_type == ET_DYN) {
933 936 /* don't use the first page */
934 937 bigwad->exenv.ex_brkbase = (caddr_t)PAGESIZE;
935 938 bigwad->exenv.ex_bssbase = (caddr_t)PAGESIZE;
936 939 } else {
937 940 bigwad->exenv.ex_bssbase = bssbase;
938 941 bigwad->exenv.ex_brkbase = brkbase;
939 942 }
940 943 bigwad->exenv.ex_brksize = brksize;
941 944 bigwad->exenv.ex_magic = elfmagic;
942 945 bigwad->exenv.ex_vp = vp;
943 946 setexecenv(&bigwad->exenv);
944 947
945 948 ADDAUX(aux, AT_PHDR, uphdr->p_vaddr + voffset)
946 949 ADDAUX(aux, AT_PHENT, ehdrp->e_phentsize)
947 950 ADDAUX(aux, AT_PHNUM, nphdrs)
948 951 ADDAUX(aux, AT_ENTRY, ehdrp->e_entry + voffset)
949 952 } else {
950 953 if ((error = execopen(&vp, &fd)) != 0) {
951 954 VN_RELE(nvp);
952 955 goto bad;
953 956 }
954 957
955 958 ADDAUX(aux, AT_EXECFD, fd)
956 959 }
957 960
958 961 if ((error = execpermissions(nvp, &bigwad->vattr, args)) != 0) {
959 962 VN_RELE(nvp);
960 963 uprintf("%s: Cannot execute %s\n", exec_file, dlnp);
961 964 goto bad;
962 965 }
963 966
964 967 /*
965 968 * Now obtain the ELF header along with the entire program
966 969 * header contained in "nvp".
967 970 */
968 971 kmem_free(phdrbase, phdrsize);
969 972 phdrbase = NULL;
970 973 if ((error = getelfhead(nvp, CRED(), ehdrp, &nshdrs,
971 974 &shstrndx, &nphdrs)) != 0 ||
972 975 (error = getelfphdr(nvp, CRED(), ehdrp, nphdrs, &phdrbase,
973 976 &phdrsize)) != 0) {
974 977 VN_RELE(nvp);
975 978 uprintf("%s: Cannot read %s\n", exec_file, dlnp);
976 979 goto bad;
977 980 }
978 981
979 982 /*
980 983 * Determine memory size of the "interpreter's" loadable
981 984 * sections. This size is then used to obtain the virtual
982 985 * address of a hole, in the user's address space, large
983 986 * enough to map the "interpreter".
984 987 */
985 988 if ((len = elfsize(ehdrp, nphdrs, phdrbase, &lddata)) == 0) {
986 989 VN_RELE(nvp);
987 990 uprintf("%s: Nothing to load in %s\n", exec_file, dlnp);
988 991 goto bad;
989 992 }
990 993
991 994 dtrphdr = NULL;
992 995
993 996 error = mapelfexec(nvp, ehdrp, nphdrs, phdrbase, NULL, &junk,
994 997 &junk, &dtrphdr, NULL, NULL, NULL, &voffset, NULL, len,
995 998 execsz, NULL);
996 999
997 1000 if (error || junk != NULL) {
998 1001 VN_RELE(nvp);
999 1002 uprintf("%s: Cannot map %s\n", exec_file, dlnp);
1000 1003 goto bad;
1001 1004 }
1002 1005
1003 1006 /*
1004 1007 * We use the DTrace program header to initialize the
1005 1008 * architecture-specific user per-LWP location. The dtrace
1006 1009 * fasttrap provider requires ready access to per-LWP scratch
1007 1010 * space. We assume that there is only one such program header
1008 1011 * in the interpreter.
1009 1012 */
1010 1013 if (dtrphdr != NULL &&
1011 1014 dtrace_safe_phdr(dtrphdr, args, voffset) != 0) {
1012 1015 VN_RELE(nvp);
1013 1016 uprintf("%s: Bad DTrace phdr in %s\n", exec_file, dlnp);
1014 1017 goto bad;
1015 1018 }
1016 1019
1017 1020 VN_RELE(nvp);
1018 1021 ADDAUX(aux, AT_SUN_LDDATA, voffset + lddata)
1019 1022 }
1020 1023
1021 1024 if (hasauxv) {
1022 1025 int auxf = AF_SUN_HWCAPVERIFY;
1023 1026 #if defined(__amd64)
1024 1027 size_t fpsize;
1025 1028 int fptype;
1026 1029 #endif /* defined(__amd64) */
1027 1030
1028 1031 /*
1029 1032 * Note: AT_SUN_PLATFORM, AT_SUN_EXECNAME and AT_RANDOM were
1030 1033 * filled in via exec_args()
1031 1034 */
1032 1035 ADDAUX(aux, AT_BASE, voffset)
1033 1036 ADDAUX(aux, AT_FLAGS, at_flags)
1034 1037 ADDAUX(aux, AT_PAGESZ, PAGESIZE)
1035 1038 /*
1036 1039 * Linker flags. (security)
1037 1040 * p_flag not yet set at this time.
1038 1041 * We rely on gexec() to provide us with the information.
1039 1042 * If the application is set-uid but this is not reflected
1040 1043 * in a mismatch between real/effective uids/gids, then
1041 1044 * don't treat this as a set-uid exec. So we care about
1042 1045 * the EXECSETID_UGIDS flag but not the ...SETID flag.
1043 1046 */
1044 1047 if ((setid &= ~EXECSETID_SETID) != 0)
1045 1048 auxf |= AF_SUN_SETUGID;
1046 1049
1047 1050 /*
1048 1051 * If we're running a native process from within a branded
1049 1052 * zone under pfexec then we clear the AF_SUN_SETUGID flag so
1050 1053 * that the native ld.so.1 is able to link with the native
1051 1054 * libraries instead of using the brand libraries that are
1052 1055 * installed in the zone. We only do this for processes
1053 1056 * which we trust because we see they are already running
1054 1057 * under pfexec (where uid != euid). This prevents a
1055 1058 * malicious user within the zone from crafting a wrapper to
1056 1059 * run native suid commands with unsecure libraries interposed.
1057 1060 */
1058 1061 if ((*brand_action == EBA_NATIVE) && (PROC_IS_BRANDED(p) &&
1059 1062 (setid &= ~EXECSETID_SETID) != 0))
1060 1063 auxf &= ~AF_SUN_SETUGID;
1061 1064
1062 1065 /*
1063 1066 * Record the user addr of the auxflags aux vector entry
1064 1067 * since brands may optionally want to manipulate this field.
1065 1068 */
1066 1069 args->auxp_auxflags =
1067 1070 (char *)((char *)args->stackend +
1068 1071 ((char *)&aux->a_type -
1069 1072 (char *)bigwad->elfargs));
1070 1073 ADDAUX(aux, AT_SUN_AUXFLAGS, auxf);
1071 1074
1072 1075 /*
1073 1076 * Record information about the real and effective user and
1074 1077 * group IDs.
1075 1078 */
1076 1079 if (cred != NULL) {
1077 1080 ADDAUX(aux, AT_SUN_UID, crgetuid(cred));
1078 1081 ADDAUX(aux, AT_SUN_RUID, crgetruid(cred));
1079 1082 ADDAUX(aux, AT_SUN_GID, crgetgid(cred));
1080 1083 ADDAUX(aux, AT_SUN_RGID, crgetrgid(cred));
1081 1084 }
1082 1085
1083 1086 /*
1084 1087 * Hardware capability flag word (performance hints)
1085 1088 * Used for choosing faster library routines.
1086 1089 * (Potentially different between 32-bit and 64-bit ABIs)
1087 1090 */
1088 1091 #if defined(_LP64)
1089 1092 if (args->to_model == DATAMODEL_NATIVE) {
1090 1093 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
1091 1094 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2)
1092 1095 } else {
1093 1096 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap32)
1094 1097 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap32_2)
1095 1098 }
1096 1099 #else
1097 1100 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
1098 1101 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2)
1099 1102 #endif
1100 1103 if (branded) {
1101 1104 /*
1102 1105 * Reserve space for the brand-private aux vectors,
1103 1106 * and record the user addr of that space.
1104 1107 */
1105 1108 args->auxp_brand =
1106 1109 (char *)((char *)args->stackend +
1107 1110 ((char *)&aux->a_type -
1108 1111 (char *)bigwad->elfargs));
1109 1112 ADDAUX(aux, AT_SUN_BRAND_AUX1, 0)
1110 1113 ADDAUX(aux, AT_SUN_BRAND_AUX2, 0)
1111 1114 ADDAUX(aux, AT_SUN_BRAND_AUX3, 0)
1112 1115 ADDAUX(aux, AT_SUN_BRAND_AUX4, 0)
1113 1116 }
1114 1117
1115 1118 /*
1116 1119 * Add the comm page auxv entry, mapping it in if needed. Also
1117 1120 * take care of the FPU entries.
1118 1121 */
1119 1122 #if defined(__amd64)
1120 1123 if (args->commpage != (uintptr_t)NULL ||
1121 1124 (args->commpage = (uintptr_t)comm_page_mapin()) !=
1122 1125 (uintptr_t)NULL) {
1123 1126 ADDAUX(aux, AT_SUN_COMMPAGE, args->commpage)
1124 1127 } else {
1125 1128 /*
1126 1129 * If the comm page cannot be mapped, pad out the auxv
1127 1130 * to satisfy later size checks.
1128 1131 */
1129 1132 ADDAUX(aux, AT_NULL, 0)
1130 1133 }
1131 1134
1132 1135 fptype = AT_386_FPINFO_NONE;
1133 1136 fpu_auxv_info(&fptype, &fpsize);
1134 1137 if (fptype != AT_386_FPINFO_NONE) {
1135 1138 ADDAUX(aux, AT_SUN_FPTYPE, fptype)
1136 1139 ADDAUX(aux, AT_SUN_FPSIZE, fpsize)
1137 1140 } else {
1138 1141 ADDAUX(aux, AT_NULL, 0)
1139 1142 ADDAUX(aux, AT_NULL, 0)
1140 1143 }
1141 1144 #endif /* defined(__amd64) */
1142 1145
1143 1146 ADDAUX(aux, AT_NULL, 0)
1144 1147 postfixsize = (uintptr_t)aux - (uintptr_t)bigwad->elfargs;
1145 1148
1146 1149 /*
1147 1150 * We make assumptions above when we determine how many aux
1148 1151 * vector entries we will be adding. However, if we have an
1149 1152 * invalid elf file, it is possible that mapelfexec might
1150 1153 * behave differently (but not return an error), in which case
1151 1154 * the number of aux entries we actually add will be different.
1152 1155 * We detect that now and error out.
1153 1156 */
1154 1157 if (postfixsize != args->auxsize) {
1155 1158 DTRACE_PROBE2(elfexec_badaux, size_t, postfixsize,
1156 1159 size_t, args->auxsize);
1157 1160 goto bad;
1158 1161 }
1159 1162 ASSERT(postfixsize <= __KERN_NAUXV_IMPL * sizeof (aux_entry_t));
1160 1163 }
1161 1164
1162 1165 /*
1163 1166 * For the 64-bit kernel, the limit is big enough that rounding it up
1164 1167 * to a page can overflow the 64-bit limit, so we check for btopr()
1165 1168 * overflowing here by comparing it with the unrounded limit in pages.
1166 1169 * If it hasn't overflowed, compare the exec size with the rounded up
1167 1170 * limit in pages. Otherwise, just compare with the unrounded limit.
1168 1171 */
1169 1172 limit = btop(p->p_vmem_ctl);
1170 1173 roundlimit = btopr(p->p_vmem_ctl);
1171 1174 if ((roundlimit > limit && *execsz > roundlimit) ||
1172 1175 (roundlimit < limit && *execsz > limit)) {
1173 1176 mutex_enter(&p->p_lock);
1174 1177 (void) rctl_action(rctlproc_legacy[RLIMIT_VMEM], p->p_rctls, p,
1175 1178 RCA_SAFE);
1176 1179 mutex_exit(&p->p_lock);
1177 1180 error = ENOMEM;
1178 1181 goto bad;
1179 1182 }
1180 1183
1181 1184 bzero(up->u_auxv, sizeof (up->u_auxv));
1182 1185 up->u_commpagep = args->commpage;
1183 1186 if (postfixsize) {
1184 1187 size_t num_auxv;
1185 1188
1186 1189 /*
1187 1190 * Copy the aux vector to the user stack.
1188 1191 */
1189 1192 error = execpoststack(args, bigwad->elfargs, postfixsize);
1190 1193 if (error)
1191 1194 goto bad;
1192 1195
1193 1196 /*
1194 1197 * Copy auxv to the process's user structure for use by /proc.
1195 1198 * If this is a branded process, the brand's exec routine will
1196 1199 * copy it's private entries to the user structure later. It
1197 1200 * relies on the fact that the blank entries are at the end.
1198 1201 */
1199 1202 num_auxv = postfixsize / sizeof (aux_entry_t);
1200 1203 ASSERT(num_auxv <= sizeof (up->u_auxv) / sizeof (auxv_t));
1201 1204 aux = bigwad->elfargs;
1202 1205 for (i = 0; i < num_auxv; i++) {
1203 1206 up->u_auxv[i].a_type = aux[i].a_type;
1204 1207 up->u_auxv[i].a_un.a_val = (aux_val_t)aux[i].a_un.a_val;
1205 1208 }
1206 1209 }
1207 1210
1208 1211 /*
1209 1212 * Pass back the starting address so we can set the program counter.
1210 1213 */
1211 1214 args->entry = (uintptr_t)(ehdrp->e_entry + voffset);
1212 1215
1213 1216 if (!uphdr) {
1214 1217 if (ehdrp->e_type == ET_DYN) {
1215 1218 /*
1216 1219 * If we are executing a shared library which doesn't
1217 1220 * have a interpreter (probably ld.so.1) then
1218 1221 * we don't set the brkbase now. Instead we
1219 1222 * delay it's setting until the first call
1220 1223 * via grow.c::brk(). This permits ld.so.1 to
1221 1224 * initialize brkbase to the tail of the executable it
1222 1225 * loads (which is where it needs to be).
1223 1226 */
1224 1227 bigwad->exenv.ex_brkbase = (caddr_t)0;
1225 1228 bigwad->exenv.ex_bssbase = (caddr_t)0;
1226 1229 bigwad->exenv.ex_brksize = 0;
1227 1230 } else {
1228 1231 bigwad->exenv.ex_brkbase = brkbase;
1229 1232 bigwad->exenv.ex_bssbase = bssbase;
1230 1233 bigwad->exenv.ex_brksize = brksize;
1231 1234 }
1232 1235 bigwad->exenv.ex_magic = elfmagic;
1233 1236 bigwad->exenv.ex_vp = vp;
1234 1237 setexecenv(&bigwad->exenv);
1235 1238 }
1236 1239
1237 1240 ASSERT(error == 0);
1238 1241 goto out;
1239 1242
1240 1243 bad:
1241 1244 if (fd != -1) /* did we open the a.out yet */
1242 1245 (void) execclose(fd);
1243 1246
1244 1247 psignal(p, SIGKILL);
1245 1248
1246 1249 if (error == 0)
1247 1250 error = ENOEXEC;
1248 1251 out:
1249 1252 if (dynuphdr)
1250 1253 kmem_free(uphdr, sizeof (Phdr));
1251 1254 if (phdrbase != NULL)
1252 1255 kmem_free(phdrbase, phdrsize);
1253 1256 if (cap != NULL)
1254 1257 kmem_free(cap, capsize);
1255 1258 kmem_free(bigwad, sizeof (struct bigwad));
1256 1259 return (error);
1257 1260 }
1258 1261
1259 1262 /*
1260 1263 * Compute the memory size requirement for the ELF file.
1261 1264 */
1262 1265 static size_t
1263 1266 elfsize(const Ehdr *ehdrp, uint_t nphdrs, const caddr_t phdrbase,
1264 1267 uintptr_t *lddata)
1265 1268 {
1266 1269 const Phdr *phdrp = (Phdr *)phdrbase;
1267 1270 const uint_t hsize = ehdrp->e_phentsize;
1268 1271 boolean_t dfirst = B_TRUE;
1269 1272 uintptr_t loaddr = UINTPTR_MAX;
1270 1273 uintptr_t hiaddr = 0;
1271 1274 uint_t i;
1272 1275
1273 1276 for (i = nphdrs; i > 0; i--) {
1274 1277 if (phdrp->p_type == PT_LOAD) {
1275 1278 const uintptr_t lo = phdrp->p_vaddr;
1276 1279 const uintptr_t hi = lo + phdrp->p_memsz;
1277 1280
1278 1281 loaddr = MIN(lo, loaddr);
1279 1282 hiaddr = MAX(hi, hiaddr);
1280 1283
1281 1284 /*
1282 1285 * save the address of the first data segment
1283 1286 * of a object - used for the AT_SUNW_LDDATA
1284 1287 * aux entry.
1285 1288 */
1286 1289 if ((lddata != NULL) && dfirst &&
1287 1290 (phdrp->p_flags & PF_W)) {
1288 1291 *lddata = lo;
1289 1292 dfirst = B_FALSE;
1290 1293 }
1291 1294 }
1292 1295 phdrp = (Phdr *)((caddr_t)phdrp + hsize);
1293 1296 }
1294 1297
1295 1298 if (hiaddr <= loaddr) {
1296 1299 /* No non-zero PT_LOAD segment found */
1297 1300 return (0);
1298 1301 }
1299 1302
1300 1303 return (roundup(hiaddr - (loaddr & PAGEMASK), PAGESIZE));
1301 1304 }
1302 1305
1303 1306 /*
1304 1307 * Read in the ELF header and program header table.
1305 1308 * SUSV3 requires:
1306 1309 * ENOEXEC File format is not recognized
1307 1310 * EINVAL Format recognized but execution not supported
1308 1311 */
1309 1312 static int
1310 1313 getelfhead(vnode_t *vp, cred_t *credp, Ehdr *ehdr, uint_t *nshdrs,
1311 1314 uint_t *shstrndx, uint_t *nphdrs)
1312 1315 {
1313 1316 int error;
1314 1317 ssize_t resid;
1315 1318
1316 1319 /*
1317 1320 * We got here by the first two bytes in ident,
1318 1321 * now read the entire ELF header.
1319 1322 */
1320 1323 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)ehdr, sizeof (Ehdr),
1321 1324 (offset_t)0, UIO_SYSSPACE, 0, (rlim64_t)0, credp, &resid)) != 0) {
1322 1325 return (error);
1323 1326 }
1324 1327
1325 1328 /*
1326 1329 * Since a separate version is compiled for handling 32-bit and
1327 1330 * 64-bit ELF executables on a 64-bit kernel, the 64-bit version
1328 1331 * doesn't need to be able to deal with 32-bit ELF files.
1329 1332 */
1330 1333 if (resid != 0 ||
1331 1334 ehdr->e_ident[EI_MAG2] != ELFMAG2 ||
1332 1335 ehdr->e_ident[EI_MAG3] != ELFMAG3) {
1333 1336 return (ENOEXEC);
1334 1337 }
1335 1338
1336 1339 if ((ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) ||
1337 1340 #if defined(_ILP32) || defined(_ELF32_COMPAT)
1338 1341 ehdr->e_ident[EI_CLASS] != ELFCLASS32 ||
1339 1342 #else
1340 1343 ehdr->e_ident[EI_CLASS] != ELFCLASS64 ||
1341 1344 #endif
1342 1345 !elfheadcheck(ehdr->e_ident[EI_DATA], ehdr->e_machine,
1343 1346 ehdr->e_flags)) {
1344 1347 return (EINVAL);
1345 1348 }
1346 1349
1347 1350 *nshdrs = ehdr->e_shnum;
1348 1351 *shstrndx = ehdr->e_shstrndx;
1349 1352 *nphdrs = ehdr->e_phnum;
1350 1353
1351 1354 /*
1352 1355 * If e_shnum, e_shstrndx, or e_phnum is its sentinel value, we need
1353 1356 * to read in the section header at index zero to access the true
1354 1357 * values for those fields.
1355 1358 */
1356 1359 if ((*nshdrs == 0 && ehdr->e_shoff != 0) ||
1357 1360 *shstrndx == SHN_XINDEX || *nphdrs == PN_XNUM) {
1358 1361 Shdr shdr;
1359 1362
1360 1363 if (ehdr->e_shoff == 0)
1361 1364 return (EINVAL);
1362 1365
1363 1366 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)&shdr,
1364 1367 sizeof (shdr), (offset_t)ehdr->e_shoff, UIO_SYSSPACE, 0,
1365 1368 (rlim64_t)0, credp, NULL)) != 0)
1366 1369 return (error);
1367 1370
1368 1371 if (*nshdrs == 0)
1369 1372 *nshdrs = shdr.sh_size;
1370 1373 if (*shstrndx == SHN_XINDEX)
1371 1374 *shstrndx = shdr.sh_link;
1372 1375 if (*nphdrs == PN_XNUM && shdr.sh_info != 0)
1373 1376 *nphdrs = shdr.sh_info;
1374 1377 }
1375 1378
1376 1379 return (0);
1377 1380 }
1378 1381
1379 1382 /*
1380 1383 * We use members through p_flags on 32-bit files and p_memsz on 64-bit files,
1381 1384 * so e_phentsize must be at least large enough to include those members.
1382 1385 */
1383 1386 #if !defined(_LP64) || defined(_ELF32_COMPAT)
1384 1387 #define MINPHENTSZ (offsetof(Phdr, p_flags) + \
1385 1388 sizeof (((Phdr *)NULL)->p_flags))
1386 1389 #else
1387 1390 #define MINPHENTSZ (offsetof(Phdr, p_memsz) + \
1388 1391 sizeof (((Phdr *)NULL)->p_memsz))
1389 1392 #endif
1390 1393
1391 1394 static int
1392 1395 getelfphdr(vnode_t *vp, cred_t *credp, const Ehdr *ehdr, uint_t nphdrs,
1393 1396 caddr_t *phbasep, size_t *phsizep)
1394 1397 {
1395 1398 int err;
1396 1399
1397 1400 /*
1398 1401 * Ensure that e_phentsize is large enough for required fields to be
1399 1402 * accessible and will maintain 8-byte alignment.
1400 1403 */
1401 1404 if (ehdr->e_phentsize < MINPHENTSZ || (ehdr->e_phentsize & 3))
1402 1405 return (EINVAL);
1403 1406
1404 1407 *phsizep = nphdrs * ehdr->e_phentsize;
1405 1408
1406 1409 if (*phsizep > sizeof (Phdr) * elf_nphdr_max) {
1407 1410 if ((*phbasep = kmem_alloc(*phsizep, KM_NOSLEEP)) == NULL)
1408 1411 return (ENOMEM);
1409 1412 } else {
1410 1413 *phbasep = kmem_alloc(*phsizep, KM_SLEEP);
1411 1414 }
1412 1415
1413 1416 if ((err = vn_rdwr(UIO_READ, vp, *phbasep, (ssize_t)*phsizep,
1414 1417 (offset_t)ehdr->e_phoff, UIO_SYSSPACE, 0, (rlim64_t)0,
1415 1418 credp, NULL)) != 0) {
1416 1419 kmem_free(*phbasep, *phsizep);
1417 1420 *phbasep = NULL;
1418 1421 return (err);
1419 1422 }
1420 1423
1421 1424 return (0);
1422 1425 }
1423 1426
1424 1427 #define MINSHDRSZ (offsetof(Shdr, sh_entsize) + \
1425 1428 sizeof (((Shdr *)NULL)->sh_entsize))
1426 1429
1427 1430 static int
1428 1431 getelfshdr(vnode_t *vp, cred_t *credp, const Ehdr *ehdr, uint_t nshdrs,
1429 1432 uint_t shstrndx, caddr_t *shbasep, size_t *shsizep, char **shstrbasep,
1430 1433 size_t *shstrsizep)
1431 1434 {
1432 1435 int err;
1433 1436 Shdr *shdr;
1434 1437
1435 1438 /*
1436 1439 * Since we're going to be using e_shentsize to iterate down the
1437 1440 * array of section headers, it must be 8-byte aligned or else
1438 1441 * a we might cause a misaligned access. We use all members through
1439 1442 * sh_entsize (on both 32- and 64-bit ELF files) so e_shentsize
1440 1443 * must be at least large enough to include that member. The index
1441 1444 * of the string table section must also be valid.
1442 1445 */
1443 1446 if (ehdr->e_shentsize < MINSHDRSZ || (ehdr->e_shentsize & 3) ||
1444 1447 nshdrs == 0 || shstrndx >= nshdrs)
1445 1448 return (EINVAL);
1446 1449
1447 1450 *shsizep = nshdrs * ehdr->e_shentsize;
1448 1451
1449 1452 if (*shsizep > sizeof (Shdr) * elf_nshdr_max) {
1450 1453 if ((*shbasep = kmem_alloc(*shsizep, KM_NOSLEEP)) == NULL)
1451 1454 return (ENOMEM);
1452 1455 } else {
1453 1456 *shbasep = kmem_alloc(*shsizep, KM_SLEEP);
1454 1457 }
1455 1458
1456 1459 if ((err = vn_rdwr(UIO_READ, vp, *shbasep, (ssize_t)*shsizep,
1457 1460 (offset_t)ehdr->e_shoff, UIO_SYSSPACE, 0, (rlim64_t)0,
1458 1461 credp, NULL)) != 0) {
1459 1462 kmem_free(*shbasep, *shsizep);
1460 1463 return (err);
1461 1464 }
1462 1465
1463 1466 /*
1464 1467 * Grab the section string table. Walking through the shdrs is
1465 1468 * pointless if their names cannot be interrogated.
1466 1469 */
1467 1470 shdr = (Shdr *)(*shbasep + shstrndx * ehdr->e_shentsize);
1468 1471 if ((*shstrsizep = shdr->sh_size) == 0) {
1469 1472 kmem_free(*shbasep, *shsizep);
1470 1473 return (EINVAL);
1471 1474 }
1472 1475
1473 1476 if (*shstrsizep > elf_shstrtab_max) {
1474 1477 if ((*shstrbasep = kmem_alloc(*shstrsizep,
1475 1478 KM_NOSLEEP)) == NULL) {
1476 1479 kmem_free(*shbasep, *shsizep);
1477 1480 return (ENOMEM);
1478 1481 }
1479 1482 } else {
1480 1483 *shstrbasep = kmem_alloc(*shstrsizep, KM_SLEEP);
1481 1484 }
1482 1485
1483 1486 if ((err = vn_rdwr(UIO_READ, vp, *shstrbasep, (ssize_t)*shstrsizep,
1484 1487 (offset_t)shdr->sh_offset, UIO_SYSSPACE, 0, (rlim64_t)0,
1485 1488 credp, NULL)) != 0) {
1486 1489 kmem_free(*shbasep, *shsizep);
1487 1490 kmem_free(*shstrbasep, *shstrsizep);
1488 1491 return (err);
1489 1492 }
1490 1493
1491 1494 /*
1492 1495 * Make sure the strtab is null-terminated to make sure we
1493 1496 * don't run off the end of the table.
1494 1497 */
1495 1498 (*shstrbasep)[*shstrsizep - 1] = '\0';
1496 1499
1497 1500 return (0);
1498 1501 }
1499 1502
1500 1503
1501 1504 int
1502 1505 elfreadhdr(vnode_t *vp, cred_t *credp, Ehdr *ehdrp, uint_t *nphdrs,
1503 1506 caddr_t *phbasep, size_t *phsizep)
1504 1507 {
1505 1508 int error;
1506 1509 uint_t nshdrs, shstrndx;
1507 1510
1508 1511 if ((error = getelfhead(vp, credp, ehdrp, &nshdrs, &shstrndx,
1509 1512 nphdrs)) != 0 ||
1510 1513 (error = getelfphdr(vp, credp, ehdrp, *nphdrs, phbasep,
1511 1514 phsizep)) != 0) {
1512 1515 return (error);
1513 1516 }
1514 1517 return (0);
1515 1518 }
1516 1519
1517 1520
1518 1521 static int
1519 1522 mapelfexec(
1520 1523 vnode_t *vp,
1521 1524 Ehdr *ehdr,
1522 1525 uint_t nphdrs,
1523 1526 caddr_t phdrbase,
1524 1527 Phdr **uphdr,
1525 1528 Phdr **intphdr,
1526 1529 Phdr **stphdr,
1527 1530 Phdr **dtphdr,
1528 1531 Phdr *dataphdrp,
1529 1532 caddr_t *bssbase,
1530 1533 caddr_t *brkbase,
1531 1534 intptr_t *voffset,
1532 1535 uintptr_t *minaddrp,
1533 1536 size_t len,
1534 1537 size_t *execsz,
1535 1538 size_t *brksize)
1536 1539 {
1537 1540 Phdr *phdr;
1538 1541 int error, page, prot, lastprot = 0;
1539 1542 caddr_t addr = NULL;
1540 1543 caddr_t minaddr = (caddr_t)UINTPTR_MAX;
1541 1544 uint_t i;
1542 1545 size_t zfodsz, memsz;
1543 1546 boolean_t ptload = B_FALSE;
1544 1547 off_t offset;
1545 1548 const uint_t hsize = ehdr->e_phentsize;
1546 1549 uintptr_t lastaddr = 0;
1547 1550 extern int use_brk_lpg;
1548 1551
1549 1552 if (ehdr->e_type == ET_DYN) {
1550 1553 caddr_t vaddr;
1551 1554 secflagset_t flags = 0;
1552 1555 /*
1553 1556 * Obtain the virtual address of a hole in the
1554 1557 * address space to map the "interpreter".
1555 1558 */
1556 1559 if (secflag_enabled(curproc, PROC_SEC_ASLR))
1557 1560 flags |= _MAP_RANDOMIZE;
1558 1561
1559 1562 map_addr(&addr, len, (offset_t)0, 1, flags);
1560 1563 if (addr == NULL)
1561 1564 return (ENOMEM);
1562 1565
1563 1566 /*
1564 1567 * Despite the fact that mmapobj(2) refuses to load them, we
1565 1568 * need to support executing ET_DYN objects that have a
1566 1569 * non-NULL p_vaddr. When found in the wild, these objects
1567 1570 * are likely to be due to an old (and largely obviated) Linux
1568 1571 * facility, prelink(8), that rewrites shared objects to
1569 1572 * prefer specific (disjoint) virtual address ranges. (Yes,
1570 1573 * this is putatively for performance -- and yes, it has
1571 1574 * limited applicability, many edge conditions and grisly
1572 1575 * failure modes; even for Linux, it's insane.) As ELF
1573 1576 * mandates that the PT_LOAD segments be in p_vaddr order, we
1574 1577 * find the lowest p_vaddr by finding the first PT_LOAD
1575 1578 * segment.
1576 1579 */
1577 1580 phdr = (Phdr *)phdrbase;
1578 1581 for (i = nphdrs; i > 0; i--) {
1579 1582 if (phdr->p_type == PT_LOAD) {
1580 1583 addr = (caddr_t)(uintptr_t)phdr->p_vaddr;
1581 1584 break;
1582 1585 }
1583 1586 phdr = (Phdr *)((caddr_t)phdr + hsize);
1584 1587 }
1585 1588
1586 1589 /*
1587 1590 * We have a non-zero p_vaddr in the first PT_LOAD segment --
1588 1591 * presumably because we're directly executing a prelink(8)'d
1589 1592 * ld-linux.so. While we could correctly execute such an
1590 1593 * object without locating it at its desired p_vaddr (it is,
1591 1594 * after all, still relocatable), our inner antiquarian
1592 1595 * derives a perverse pleasure in accommodating the steampunk
1593 1596 * prelink(8) contraption -- goggles on!
1594 1597 */
1595 1598 if ((vaddr = addr) != NULL) {
1596 1599 if (as_gap(curproc->p_as, len, &addr, &len,
1597 1600 AH_LO, NULL) == -1 || addr != vaddr) {
1598 1601 addr = NULL;
1599 1602 }
1600 1603 }
1601 1604
1602 1605 if (addr == NULL) {
1603 1606 /*
1604 1607 * We either have a NULL p_vaddr (the common case, by
1605 1608 * many orders of magnitude) or we have a non-NULL
1606 1609 * p_vaddr and we were unable to obtain the specified
1607 1610 * VA range (presumably because it's an illegal
1608 1611 * address). Either way, obtain an address in which
1609 1612 * to map the interpreter.
1610 1613 */
1611 1614 map_addr(&addr, len, (offset_t)0, 1, 0);
1612 1615 if (addr == NULL)
1613 1616 return (ENOMEM);
1614 1617 }
1615 1618
1616 1619 /*
1617 1620 * Our voffset is the difference between where we landed and
1618 1621 * where we wanted to be.
1619 1622 */
1620 1623 *voffset = (uintptr_t)addr - (uintptr_t)vaddr;
1621 1624 } else {
1622 1625 *voffset = 0;
1623 1626 }
1624 1627
1625 1628 phdr = (Phdr *)phdrbase;
1626 1629 for (i = nphdrs; i > 0; i--) {
1627 1630 switch (phdr->p_type) {
1628 1631 case PT_LOAD:
1629 1632 ptload = B_TRUE;
1630 1633 prot = PROT_USER;
1631 1634 if (phdr->p_flags & PF_R)
1632 1635 prot |= PROT_READ;
1633 1636 if (phdr->p_flags & PF_W)
1634 1637 prot |= PROT_WRITE;
1635 1638 if (phdr->p_flags & PF_X)
1636 1639 prot |= PROT_EXEC;
1637 1640
1638 1641 addr = (caddr_t)((uintptr_t)phdr->p_vaddr + *voffset);
1639 1642
1640 1643 if ((*intphdr != NULL) && uphdr != NULL &&
1641 1644 (*uphdr == NULL)) {
1642 1645 /*
1643 1646 * The PT_PHDR program header is, strictly
1644 1647 * speaking, optional. If we find that this
1645 1648 * is missing, we will determine the location
1646 1649 * of the program headers based on the address
1647 1650 * of the lowest PT_LOAD segment (namely, this
1648 1651 * one): we subtract the p_offset to get to
1649 1652 * the ELF header and then add back the program
1650 1653 * header offset to get to the program headers.
1651 1654 * We then cons up a Phdr that corresponds to
1652 1655 * the (missing) PT_PHDR, setting the flags
1653 1656 * to 0 to denote that this is artificial and
1654 1657 * should (must) be freed by the caller.
1655 1658 */
1656 1659 Phdr *cons;
1657 1660
1658 1661 cons = kmem_zalloc(sizeof (Phdr), KM_SLEEP);
1659 1662
1660 1663 cons->p_flags = 0;
1661 1664 cons->p_type = PT_PHDR;
1662 1665 cons->p_vaddr = ((uintptr_t)addr -
1663 1666 phdr->p_offset) + ehdr->e_phoff;
1664 1667
1665 1668 *uphdr = cons;
1666 1669 }
1667 1670
1668 1671 /*
1669 1672 * The ELF spec dictates that p_filesz may not be
1670 1673 * larger than p_memsz in PT_LOAD segments.
1671 1674 */
1672 1675 if (phdr->p_filesz > phdr->p_memsz) {
1673 1676 error = EINVAL;
1674 1677 goto bad;
1675 1678 }
1676 1679
1677 1680 /*
1678 1681 * Keep track of the segment with the lowest starting
1679 1682 * address.
1680 1683 */
1681 1684 if (addr < minaddr)
1682 1685 minaddr = addr;
1683 1686
1684 1687 /*
1685 1688 * Segments need not correspond to page boundaries:
1686 1689 * they are permitted to share a page. If two PT_LOAD
1687 1690 * segments share the same page, and the permissions
1688 1691 * of the segments differ, the behavior is historically
1689 1692 * that the permissions of the latter segment are used
1690 1693 * for the page that the two segments share. This is
1691 1694 * also historically a non-issue: binaries generated
1692 1695 * by most anything will make sure that two PT_LOAD
1693 1696 * segments with differing permissions don't actually
1694 1697 * share any pages. However, there exist some crazy
1695 1698 * things out there (including at least an obscure
1696 1699 * Portuguese teaching language called G-Portugol) that
1697 1700 * actually do the wrong thing and expect it to work:
1698 1701 * they have a segment with execute permission share
1699 1702 * a page with a subsequent segment that does not
1700 1703 * have execute permissions and expect the resulting
1701 1704 * shared page to in fact be executable. To accommodate
1702 1705 * such broken link editors, we take advantage of a
1703 1706 * latitude explicitly granted to the loader: it is
1704 1707 * permitted to make _any_ PT_LOAD segment executable
1705 1708 * (provided that it is readable or writable). If we
1706 1709 * see that we're sharing a page and that the previous
1707 1710 * page was executable, we will add execute permissions
1708 1711 * to our segment.
1709 1712 */
1710 1713 if (btop(lastaddr) == btop((uintptr_t)addr) &&
1711 1714 (phdr->p_flags & (PF_R | PF_W)) &&
1712 1715 (lastprot & PROT_EXEC)) {
1713 1716 prot |= PROT_EXEC;
1714 1717 }
1715 1718
1716 1719 lastaddr = (uintptr_t)addr + phdr->p_filesz;
1717 1720 lastprot = prot;
1718 1721
1719 1722 zfodsz = (size_t)phdr->p_memsz - phdr->p_filesz;
1720 1723
1721 1724 offset = phdr->p_offset;
1722 1725 if (((uintptr_t)offset & PAGEOFFSET) ==
1723 1726 ((uintptr_t)addr & PAGEOFFSET) &&
1724 1727 (!(vp->v_flag & VNOMAP))) {
1725 1728 page = 1;
1726 1729 } else {
1727 1730 page = 0;
1728 1731 }
1729 1732
1730 1733 /*
1731 1734 * Set the heap pagesize for OOB when the bss size
1732 1735 * is known and use_brk_lpg is not 0.
1733 1736 */
1734 1737 if (brksize != NULL && use_brk_lpg &&
1735 1738 zfodsz != 0 && phdr == dataphdrp &&
1736 1739 (prot & PROT_WRITE)) {
1737 1740 const size_t tlen = P2NPHASE((uintptr_t)addr +
1738 1741 phdr->p_filesz, PAGESIZE);
1739 1742
1740 1743 if (zfodsz > tlen) {
1741 1744 const caddr_t taddr = addr +
1742 1745 phdr->p_filesz + tlen;
1743 1746
1744 1747 /*
1745 1748 * Since a hole in the AS large enough
1746 1749 * for this object as calculated by
1747 1750 * elfsize() is available, we do not
1748 1751 * need to fear overflow for 'taddr'.
1749 1752 */
1750 1753 curproc->p_brkpageszc =
1751 1754 page_szc(map_pgsz(MAPPGSZ_HEAP,
1752 1755 curproc, taddr, zfodsz - tlen, 0));
1753 1756 }
1754 1757 }
1755 1758
1756 1759 if (curproc->p_brkpageszc != 0 && phdr == dataphdrp &&
1757 1760 (prot & PROT_WRITE)) {
1758 1761 uint_t szc = curproc->p_brkpageszc;
1759 1762 size_t pgsz = page_get_pagesize(szc);
1760 1763 caddr_t ebss = addr + phdr->p_memsz;
1761 1764 /*
1762 1765 * If we need extra space to keep the BSS an
1763 1766 * integral number of pages in size, some of
1764 1767 * that space may fall beyond p_brkbase, so we
1765 1768 * need to set p_brksize to account for it
1766 1769 * being (logically) part of the brk.
1767 1770 */
1768 1771 size_t extra_zfodsz;
1769 1772
1770 1773 ASSERT(pgsz > PAGESIZE);
1771 1774
1772 1775 extra_zfodsz = P2NPHASE((uintptr_t)ebss, pgsz);
1773 1776
1774 1777 if (error = execmap(vp, addr, phdr->p_filesz,
1775 1778 zfodsz + extra_zfodsz, phdr->p_offset,
1776 1779 prot, page, szc))
1777 1780 goto bad;
1778 1781 if (brksize != NULL)
1779 1782 *brksize = extra_zfodsz;
1780 1783 } else {
1781 1784 if (error = execmap(vp, addr, phdr->p_filesz,
1782 1785 zfodsz, phdr->p_offset, prot, page, 0))
1783 1786 goto bad;
1784 1787 }
1785 1788
1786 1789 if (bssbase != NULL && addr >= *bssbase &&
1787 1790 phdr == dataphdrp) {
1788 1791 *bssbase = addr + phdr->p_filesz;
1789 1792 }
1790 1793 if (brkbase != NULL && addr >= *brkbase) {
1791 1794 *brkbase = addr + phdr->p_memsz;
1792 1795 }
1793 1796
1794 1797 memsz = btopr(phdr->p_memsz);
1795 1798 if ((*execsz + memsz) < *execsz) {
1796 1799 error = ENOMEM;
1797 1800 goto bad;
1798 1801 }
1799 1802 *execsz += memsz;
1800 1803 break;
1801 1804
1802 1805 case PT_INTERP:
1803 1806 /*
1804 1807 * The ELF specification is unequivocal about the
1805 1808 * PT_INTERP program header with respect to any PT_LOAD
1806 1809 * program header: "If it is present, it must precede
1807 1810 * any loadable segment entry." Linux, however, makes
1808 1811 * no attempt to enforce this -- which has allowed some
1809 1812 * binary editing tools to get away with generating
1810 1813 * invalid ELF binaries in the respect that PT_INTERP
1811 1814 * occurs after the first PT_LOAD program header. This
1812 1815 * is unfortunate (and of course, disappointing) but
1813 1816 * it's no worse than that: there is no reason that we
1814 1817 * can't process the PT_INTERP entry (if present) after
1815 1818 * one or more PT_LOAD entries. We therefore
1816 1819 * deliberately do not check ptload here and always
1817 1820 * store dyphdr to be the PT_INTERP program header.
1818 1821 */
1819 1822 *intphdr = phdr;
1820 1823 break;
1821 1824
1822 1825 case PT_SHLIB:
1823 1826 *stphdr = phdr;
1824 1827 break;
1825 1828
1826 1829 case PT_PHDR:
1827 1830 if (ptload || phdr->p_flags == 0)
1828 1831 goto bad;
1829 1832
1830 1833 if (uphdr != NULL)
1831 1834 *uphdr = phdr;
1832 1835
1833 1836 break;
1834 1837
1835 1838 case PT_NULL:
1836 1839 case PT_DYNAMIC:
1837 1840 case PT_NOTE:
1838 1841 break;
1839 1842
1840 1843 case PT_SUNWDTRACE:
1841 1844 if (dtphdr != NULL)
1842 1845 *dtphdr = phdr;
1843 1846 break;
1844 1847
1845 1848 default:
1846 1849 break;
1847 1850 }
1848 1851 phdr = (Phdr *)((caddr_t)phdr + hsize);
1849 1852 }
1850 1853
1851 1854 if (minaddrp != NULL) {
1852 1855 ASSERT(minaddr != (caddr_t)UINTPTR_MAX);
1853 1856 *minaddrp = (uintptr_t)minaddr;
1854 1857 }
1855 1858
1856 1859 if (brkbase != NULL && secflag_enabled(curproc, PROC_SEC_ASLR)) {
1857 1860 size_t off;
1858 1861 uintptr_t base = (uintptr_t)*brkbase;
1859 1862 uintptr_t oend = base + *brksize;
1860 1863
1861 1864 ASSERT(ISP2(aslr_max_brk_skew));
1862 1865
1863 1866 (void) random_get_pseudo_bytes((uint8_t *)&off, sizeof (off));
1864 1867 base += P2PHASE(off, aslr_max_brk_skew);
1865 1868 base = P2ROUNDUP(base, PAGESIZE);
1866 1869 *brkbase = (caddr_t)base;
1867 1870 /*
1868 1871 * Above, we set *brksize to account for the possibility we
1869 1872 * had to grow the 'brk' in padding out the BSS to a page
1870 1873 * boundary.
1871 1874 *
1872 1875 * We now need to adjust that based on where we now are
1873 1876 * actually putting the brk.
1874 1877 */
1875 1878 if (oend > base)
1876 1879 *brksize = oend - base;
1877 1880 else
1878 1881 *brksize = 0;
1879 1882 }
1880 1883
1881 1884 return (0);
1882 1885 bad:
1883 1886 if (error == 0)
1884 1887 error = EINVAL;
1885 1888 return (error);
1886 1889 }
1887 1890
1888 1891 int
1889 1892 elfnote(vnode_t *vp, offset_t *offsetp, int type, int descsz, void *desc,
1890 1893 rlim64_t rlimit, cred_t *credp)
1891 1894 {
1892 1895 Note note;
1893 1896 int error;
1894 1897
1895 1898 bzero(¬e, sizeof (note));
1896 1899 bcopy("CORE", note.name, 4);
1897 1900 note.nhdr.n_type = type;
1898 1901 /*
1899 1902 * The System V ABI states that n_namesz must be the length of the
1900 1903 * string that follows the Nhdr structure including the terminating
1901 1904 * null. The ABI also specifies that sufficient padding should be
1902 1905 * included so that the description that follows the name string
1903 1906 * begins on a 4- or 8-byte boundary for 32- and 64-bit binaries
1904 1907 * respectively. However, since this change was not made correctly
1905 1908 * at the time of the 64-bit port, both 32- and 64-bit binaries
1906 1909 * descriptions are only guaranteed to begin on a 4-byte boundary.
1907 1910 */
1908 1911 note.nhdr.n_namesz = 5;
1909 1912 note.nhdr.n_descsz = roundup(descsz, sizeof (Word));
1910 1913
1911 1914 if (error = core_write(vp, UIO_SYSSPACE, *offsetp, ¬e,
1912 1915 sizeof (note), rlimit, credp))
1913 1916 return (error);
1914 1917
1915 1918 *offsetp += sizeof (note);
1916 1919
1917 1920 if (error = core_write(vp, UIO_SYSSPACE, *offsetp, desc,
1918 1921 note.nhdr.n_descsz, rlimit, credp))
1919 1922 return (error);
1920 1923
1921 1924 *offsetp += note.nhdr.n_descsz;
1922 1925 return (0);
1923 1926 }
1924 1927
1925 1928
1926 1929 /*
1927 1930 * Copy the section data from one vnode to the section of another vnode.
1928 1931 */
1929 1932 static void
1930 1933 elf_copy_scn(elf_core_ctx_t *ctx, const Shdr *src, vnode_t *src_vp, Shdr *dst)
1931 1934 {
1932 1935 size_t n = src->sh_size;
1933 1936 u_offset_t off = 0;
1934 1937 const u_offset_t soff = src->sh_offset;
1935 1938 const u_offset_t doff = ctx->ecc_doffset;
1936 1939 void *buf = ctx->ecc_buf;
1937 1940 vnode_t *dst_vp = ctx->ecc_vp;
1938 1941 cred_t *credp = ctx->ecc_credp;
1939 1942
1940 1943 /* Protect the copy loop below from overflow on the offsets */
1941 1944 if (n > OFF_MAX || (n + soff) > OFF_MAX || (n + doff) > OFF_MAX ||
1942 1945 (n + soff) < n || (n + doff) < n) {
1943 1946 dst->sh_size = 0;
1944 1947 dst->sh_offset = 0;
1945 1948 return;
1946 1949 }
1947 1950
1948 1951 while (n != 0) {
1949 1952 const size_t len = MIN(ctx->ecc_bufsz, n);
1950 1953 ssize_t resid;
1951 1954
1952 1955 if (vn_rdwr(UIO_READ, src_vp, buf, (ssize_t)len,
1953 1956 (offset_t)(soff + off),
1954 1957 UIO_SYSSPACE, 0, (rlim64_t)0, credp, &resid) != 0 ||
1955 1958 resid >= len || resid < 0 ||
1956 1959 core_write(dst_vp, UIO_SYSSPACE, (offset_t)(doff + off),
1957 1960 buf, len - resid, ctx->ecc_rlimit, credp) != 0) {
1958 1961 dst->sh_size = 0;
1959 1962 dst->sh_offset = 0;
1960 1963 return;
1961 1964 }
1962 1965
1963 1966 ASSERT(n >= len - resid);
1964 1967
1965 1968 n -= len - resid;
1966 1969 off += len - resid;
1967 1970 }
1968 1971
1969 1972 ctx->ecc_doffset += src->sh_size;
1970 1973 }
1971 1974
1972 1975 /*
1973 1976 * Walk sections for a given ELF object, counting (or copying) those of
1974 1977 * interest (CTF, symtab, strtab).
1975 1978 */
1976 1979 static uint_t
1977 1980 elf_process_obj_scns(elf_core_ctx_t *ctx, vnode_t *mvp, caddr_t saddr,
1978 1981 Shdr *v, uint_t idx, uint_t remain, shstrtab_t *shstrtab)
1979 1982 {
1980 1983 Ehdr ehdr;
1981 1984 const core_content_t content = ctx->ecc_content;
1982 1985 cred_t *credp = ctx->ecc_credp;
1983 1986 Shdr *ctf = NULL, *symtab = NULL, *strtab = NULL;
1984 1987 uintptr_t off = 0;
1985 1988 uint_t nshdrs, shstrndx, nphdrs, count = 0;
1986 1989 u_offset_t *doffp = &ctx->ecc_doffset;
1987 1990 boolean_t ctf_link = B_FALSE;
1988 1991 caddr_t shbase;
1989 1992 size_t shsize, shstrsize;
1990 1993 char *shstrbase;
1991 1994
1992 1995 if ((content & (CC_CONTENT_CTF | CC_CONTENT_SYMTAB)) == 0) {
1993 1996 return (0);
1994 1997 }
1995 1998
1996 1999 if (getelfhead(mvp, credp, &ehdr, &nshdrs, &shstrndx, &nphdrs) != 0 ||
1997 2000 getelfshdr(mvp, credp, &ehdr, nshdrs, shstrndx, &shbase, &shsize,
1998 2001 &shstrbase, &shstrsize) != 0) {
1999 2002 return (0);
2000 2003 }
2001 2004
2002 2005 /* Starting at index 1 skips SHT_NULL which is expected at index 0 */
2003 2006 off = ehdr.e_shentsize;
2004 2007 for (uint_t i = 1; i < nshdrs; i++, off += ehdr.e_shentsize) {
2005 2008 Shdr *shdr, *symchk = NULL, *strchk;
2006 2009 const char *name;
2007 2010
2008 2011 shdr = (Shdr *)(shbase + off);
2009 2012 if (shdr->sh_name >= shstrsize || shdr->sh_type == SHT_NULL)
2010 2013 continue;
2011 2014
2012 2015 name = shstrbase + shdr->sh_name;
2013 2016
2014 2017 if (ctf == NULL &&
2015 2018 (content & CC_CONTENT_CTF) != 0 &&
2016 2019 strcmp(name, shstrtab_data[STR_CTF]) == 0) {
2017 2020 ctf = shdr;
2018 2021 if (ctf->sh_link != 0 && ctf->sh_link < nshdrs) {
2019 2022 /* check linked symtab below */
2020 2023 symchk = (Shdr *)(shbase +
2021 2024 shdr->sh_link * ehdr.e_shentsize);
2022 2025 ctf_link = B_TRUE;
2023 2026 } else {
2024 2027 continue;
2025 2028 }
2026 2029 } else if (symtab == NULL &&
2027 2030 (content & CC_CONTENT_SYMTAB) != 0 &&
2028 2031 strcmp(name, shstrtab_data[STR_SYMTAB]) == 0) {
2029 2032 symchk = shdr;
2030 2033 } else {
2031 2034 continue;
2032 2035 }
2033 2036
2034 2037 ASSERT(symchk != NULL);
2035 2038 if ((symchk->sh_type != SHT_DYNSYM &&
2036 2039 symchk->sh_type != SHT_SYMTAB) ||
2037 2040 symchk->sh_link == 0 || symchk->sh_link >= nshdrs) {
2038 2041 ctf_link = B_FALSE;
2039 2042 continue;
2040 2043 }
2041 2044 strchk = (Shdr *)(shbase + symchk->sh_link * ehdr.e_shentsize);
2042 2045 if (strchk->sh_type != SHT_STRTAB) {
2043 2046 ctf_link = B_FALSE;
2044 2047 continue;
2045 2048 }
2046 2049 symtab = symchk;
2047 2050 strtab = strchk;
2048 2051
2049 2052 if (symtab != NULL && ctf != NULL) {
2050 2053 /* No other shdrs are of interest at this point */
2051 2054 break;
2052 2055 }
2053 2056 }
2054 2057
2055 2058 if (ctf != NULL)
2056 2059 count += 1;
2057 2060 if (symtab != NULL)
2058 2061 count += 2;
2059 2062 if (v == NULL || count == 0 || count > remain) {
2060 2063 count = MIN(count, remain);
2061 2064 goto done;
2062 2065 }
2063 2066
2064 2067 /* output CTF section */
2065 2068 if (ctf != NULL) {
2066 2069 elf_ctx_resize_scratch(ctx, ctf->sh_size);
2067 2070
2068 2071 v[idx].sh_name = shstrtab_ndx(shstrtab, STR_CTF);
2069 2072 v[idx].sh_addr = (Addr)(uintptr_t)saddr;
2070 2073 v[idx].sh_type = SHT_PROGBITS;
2071 2074 v[idx].sh_addralign = 4;
2072 2075 *doffp = roundup(*doffp, v[idx].sh_addralign);
2073 2076 v[idx].sh_offset = *doffp;
2074 2077 v[idx].sh_size = ctf->sh_size;
2075 2078
2076 2079 if (ctf_link) {
2077 2080 /*
2078 2081 * The linked symtab (and strtab) will be output
2079 2082 * immediately after this CTF section. Its shdr index
2080 2083 * directly follows this one.
2081 2084 */
2082 2085 v[idx].sh_link = idx + 1;
2083 2086 ASSERT(symtab != NULL);
2084 2087 } else {
2085 2088 v[idx].sh_link = 0;
2086 2089 }
2087 2090 elf_copy_scn(ctx, ctf, mvp, &v[idx]);
2088 2091 idx++;
2089 2092 }
2090 2093
2091 2094 /* output SYMTAB/STRTAB sections */
2092 2095 if (symtab != NULL) {
2093 2096 uint_t symtab_name, strtab_name;
2094 2097
2095 2098 elf_ctx_resize_scratch(ctx,
2096 2099 MAX(symtab->sh_size, strtab->sh_size));
2097 2100
2098 2101 if (symtab->sh_type == SHT_DYNSYM) {
2099 2102 symtab_name = shstrtab_ndx(shstrtab, STR_DYNSYM);
2100 2103 strtab_name = shstrtab_ndx(shstrtab, STR_DYNSTR);
2101 2104 } else {
2102 2105 symtab_name = shstrtab_ndx(shstrtab, STR_SYMTAB);
2103 2106 strtab_name = shstrtab_ndx(shstrtab, STR_STRTAB);
2104 2107 }
2105 2108
2106 2109 v[idx].sh_name = symtab_name;
2107 2110 v[idx].sh_type = symtab->sh_type;
2108 2111 v[idx].sh_addr = symtab->sh_addr;
2109 2112 if (ehdr.e_type == ET_DYN || v[idx].sh_addr == 0)
2110 2113 v[idx].sh_addr += (Addr)(uintptr_t)saddr;
2111 2114 v[idx].sh_addralign = symtab->sh_addralign;
2112 2115 *doffp = roundup(*doffp, v[idx].sh_addralign);
2113 2116 v[idx].sh_offset = *doffp;
2114 2117 v[idx].sh_size = symtab->sh_size;
2115 2118 v[idx].sh_link = idx + 1;
2116 2119 v[idx].sh_entsize = symtab->sh_entsize;
2117 2120 v[idx].sh_info = symtab->sh_info;
2118 2121
2119 2122 elf_copy_scn(ctx, symtab, mvp, &v[idx]);
2120 2123 idx++;
2121 2124
2122 2125 v[idx].sh_name = strtab_name;
2123 2126 v[idx].sh_type = SHT_STRTAB;
2124 2127 v[idx].sh_flags = SHF_STRINGS;
2125 2128 v[idx].sh_addr = strtab->sh_addr;
2126 2129 if (ehdr.e_type == ET_DYN || v[idx].sh_addr == 0)
2127 2130 v[idx].sh_addr += (Addr)(uintptr_t)saddr;
2128 2131 v[idx].sh_addralign = strtab->sh_addralign;
2129 2132 *doffp = roundup(*doffp, v[idx].sh_addralign);
2130 2133 v[idx].sh_offset = *doffp;
2131 2134 v[idx].sh_size = strtab->sh_size;
2132 2135
2133 2136 elf_copy_scn(ctx, strtab, mvp, &v[idx]);
2134 2137 idx++;
2135 2138 }
2136 2139
2137 2140 done:
2138 2141 kmem_free(shstrbase, shstrsize);
2139 2142 kmem_free(shbase, shsize);
2140 2143 return (count);
2141 2144 }
2142 2145
2143 2146 /*
2144 2147 * Walk mappings in process address space, examining those which correspond to
2145 2148 * loaded objects. It is called twice from elfcore: Once to simply count
2146 2149 * relevant sections, and again later to copy those sections once an adequate
2147 2150 * buffer has been allocated for the shdr details.
2148 2151 */
2149 2152 static int
2150 2153 elf_process_scns(elf_core_ctx_t *ctx, Shdr *v, uint_t nv, uint_t *nshdrsp)
2151 2154 {
2152 2155 vnode_t *lastvp = NULL;
2153 2156 struct seg *seg;
2154 2157 uint_t idx = 0, remain;
2155 2158 shstrtab_t shstrtab;
2156 2159 struct as *as = ctx->ecc_p->p_as;
2157 2160 int error = 0;
2158 2161
2159 2162 ASSERT(AS_WRITE_HELD(as));
2160 2163
2161 2164 if (v != NULL) {
2162 2165 ASSERT(nv != 0);
2163 2166
2164 2167 shstrtab_init(&shstrtab);
2165 2168 remain = nv;
2166 2169 } else {
2167 2170 ASSERT(nv == 0);
2168 2171
2169 2172 /*
2170 2173 * The shdrs are being counted, rather than outputting them
2171 2174 * into a buffer. Leave room for two entries: the SHT_NULL at
2172 2175 * index 0 and the shstrtab at the end.
2173 2176 */
2174 2177 remain = UINT_MAX - 2;
2175 2178 }
2176 2179
2177 2180 /* Per the ELF spec, shdr index 0 is reserved. */
2178 2181 idx = 1;
2179 2182 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
2180 2183 vnode_t *mvp;
2181 2184 void *tmp = NULL;
2182 2185 caddr_t saddr = seg->s_base, naddr, eaddr;
2183 2186 size_t segsize;
2184 2187 uint_t count, prot;
2185 2188
2186 2189 /*
2187 2190 * Since we're just looking for text segments of load
2188 2191 * objects, we only care about the protection bits; we don't
2189 2192 * care about the actual size of the segment so we use the
2190 2193 * reserved size. If the segment's size is zero, there's
2191 2194 * something fishy going on so we ignore this segment.
2192 2195 */
2193 2196 if (seg->s_ops != &segvn_ops ||
2194 2197 SEGOP_GETVP(seg, seg->s_base, &mvp) != 0 ||
2195 2198 mvp == lastvp || mvp == NULL || mvp->v_type != VREG ||
2196 2199 (segsize = pr_getsegsize(seg, 1)) == 0)
2197 2200 continue;
2198 2201
2199 2202 eaddr = saddr + segsize;
2200 2203 prot = pr_getprot(seg, 1, &tmp, &saddr, &naddr, eaddr);
2201 2204 pr_getprot_done(&tmp);
2202 2205
2203 2206 /*
2204 2207 * Skip this segment unless the protection bits look like
2205 2208 * what we'd expect for a text segment.
2206 2209 */
2207 2210 if ((prot & (PROT_WRITE | PROT_EXEC)) != PROT_EXEC)
2208 2211 continue;
2209 2212
2210 2213 count = elf_process_obj_scns(ctx, mvp, saddr, v, idx, remain,
2211 2214 &shstrtab);
2212 2215
2213 2216 ASSERT(count <= remain);
2214 2217 ASSERT(v == NULL || (idx + count) < nv);
2215 2218
2216 2219 remain -= count;
2217 2220 idx += count;
2218 2221 lastvp = mvp;
2219 2222 }
2220 2223
2221 2224 if (v == NULL) {
2222 2225 if (idx == 1) {
2223 2226 *nshdrsp = 0;
2224 2227 } else {
2225 2228 /* Include room for the shrstrtab at the end */
2226 2229 *nshdrsp = idx + 1;
2227 2230 }
2228 2231 return (0);
2229 2232 }
2230 2233
2231 2234 if (idx != nv - 1) {
2232 2235 cmn_err(CE_WARN, "elfcore: core dump failed for "
2233 2236 "process %d; address space is changing",
2234 2237 ctx->ecc_p->p_pid);
2235 2238 return (EIO);
2236 2239 }
2237 2240
2238 2241 v[idx].sh_name = shstrtab_ndx(&shstrtab, STR_SHSTRTAB);
2239 2242 v[idx].sh_size = shstrtab_size(&shstrtab);
2240 2243 v[idx].sh_addralign = 1;
2241 2244 v[idx].sh_offset = ctx->ecc_doffset;
2242 2245 v[idx].sh_flags = SHF_STRINGS;
2243 2246 v[idx].sh_type = SHT_STRTAB;
2244 2247
2245 2248 elf_ctx_resize_scratch(ctx, v[idx].sh_size);
2246 2249 VERIFY3U(ctx->ecc_bufsz, >=, v[idx].sh_size);
2247 2250 shstrtab_dump(&shstrtab, ctx->ecc_buf);
2248 2251
2249 2252 error = core_write(ctx->ecc_vp, UIO_SYSSPACE, ctx->ecc_doffset,
2250 2253 ctx->ecc_buf, v[idx].sh_size, ctx->ecc_rlimit, ctx->ecc_credp);
2251 2254 if (error == 0) {
2252 2255 ctx->ecc_doffset += v[idx].sh_size;
2253 2256 }
2254 2257
2255 2258 return (error);
2256 2259 }
|
↓ open down ↓ |
2144 lines elided |
↑ open up ↑ |
2257 2260
2258 2261 int
2259 2262 elfcore(vnode_t *vp, proc_t *p, cred_t *credp, rlim64_t rlimit, int sig,
2260 2263 core_content_t content)
2261 2264 {
2262 2265 u_offset_t poffset, soffset, doffset;
2263 2266 int error;
2264 2267 uint_t i, nphdrs, nshdrs;
2265 2268 struct seg *seg;
2266 2269 struct as *as = p->p_as;
2267 - void *bigwad;
2270 + void *bigwad, *zeropg = NULL;
2268 2271 size_t bigsize, phdrsz, shdrsz;
2269 2272 Ehdr *ehdr;
2270 2273 Phdr *phdr;
2271 2274 Shdr shdr0;
2272 2275 caddr_t brkbase, stkbase;
2273 2276 size_t brksize, stksize;
2274 2277 boolean_t overflowed = B_FALSE, retried = B_FALSE;
2275 2278 klwp_t *lwp = ttolwp(curthread);
2276 2279 elf_core_ctx_t ctx = {
2277 2280 .ecc_vp = vp,
2278 2281 .ecc_p = p,
2279 2282 .ecc_credp = credp,
2280 2283 .ecc_rlimit = rlimit,
2281 2284 .ecc_content = content,
2282 2285 .ecc_doffset = 0,
2283 2286 .ecc_buf = NULL,
2284 2287 .ecc_bufsz = 0
2285 2288 };
2286 2289
2287 2290 top:
2288 2291 /*
2289 2292 * Make sure we have everything we need (registers, etc.).
2290 2293 * All other lwps have already stopped and are in an orderly state.
2291 2294 */
2292 2295 ASSERT(p == ttoproc(curthread));
2293 2296 prstop(0, 0);
2294 2297
2295 2298 AS_LOCK_ENTER(as, RW_WRITER);
2296 2299 nphdrs = prnsegs(as, 0) + 2; /* two CORE note sections */
2297 2300
2298 2301 /*
2299 2302 * Count the number of section headers we're going to need.
2300 2303 */
2301 2304 nshdrs = 0;
2302 2305 if (content & (CC_CONTENT_CTF | CC_CONTENT_SYMTAB)) {
2303 2306 VERIFY0(elf_process_scns(&ctx, NULL, 0, &nshdrs));
2304 2307 }
2305 2308 AS_LOCK_EXIT(as);
2306 2309
2307 2310 /*
2308 2311 * The core file contents may require zero section headers, but if
2309 2312 * we overflow the 16 bits allotted to the program header count in
2310 2313 * the ELF header, we'll need that program header at index zero.
2311 2314 */
2312 2315 if (nshdrs == 0 && nphdrs >= PN_XNUM) {
2313 2316 nshdrs = 1;
2314 2317 }
2315 2318
2316 2319 /*
2317 2320 * Allocate a buffer which is sized adequately to hold the ehdr, phdrs
2318 2321 * or shdrs needed to produce the core file. It is used for the three
2319 2322 * tasks sequentially, not simultaneously, so it does not need space
2320 2323 * for all three data at once, only the largest one.
2321 2324 */
2322 2325 VERIFY(nphdrs >= 2);
2323 2326 phdrsz = nphdrs * sizeof (Phdr);
2324 2327 shdrsz = nshdrs * sizeof (Shdr);
2325 2328 bigsize = MAX(sizeof (Ehdr), MAX(phdrsz, shdrsz));
2326 2329 bigwad = kmem_alloc(bigsize, KM_SLEEP);
2327 2330
2328 2331 ehdr = (Ehdr *)bigwad;
2329 2332 bzero(ehdr, sizeof (*ehdr));
2330 2333
2331 2334 ehdr->e_ident[EI_MAG0] = ELFMAG0;
2332 2335 ehdr->e_ident[EI_MAG1] = ELFMAG1;
2333 2336 ehdr->e_ident[EI_MAG2] = ELFMAG2;
2334 2337 ehdr->e_ident[EI_MAG3] = ELFMAG3;
2335 2338 ehdr->e_ident[EI_CLASS] = ELFCLASS;
2336 2339 ehdr->e_type = ET_CORE;
2337 2340
2338 2341 #if !defined(_LP64) || defined(_ELF32_COMPAT)
2339 2342
2340 2343 #if defined(__sparc)
2341 2344 ehdr->e_ident[EI_DATA] = ELFDATA2MSB;
2342 2345 ehdr->e_machine = EM_SPARC;
2343 2346 #elif defined(__i386) || defined(__i386_COMPAT)
2344 2347 ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
2345 2348 ehdr->e_machine = EM_386;
2346 2349 #else
2347 2350 #error "no recognized machine type is defined"
2348 2351 #endif
2349 2352
2350 2353 #else /* !defined(_LP64) || defined(_ELF32_COMPAT) */
2351 2354
2352 2355 #if defined(__sparc)
2353 2356 ehdr->e_ident[EI_DATA] = ELFDATA2MSB;
2354 2357 ehdr->e_machine = EM_SPARCV9;
2355 2358 #elif defined(__amd64)
2356 2359 ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
2357 2360 ehdr->e_machine = EM_AMD64;
2358 2361 #else
2359 2362 #error "no recognized 64-bit machine type is defined"
2360 2363 #endif
2361 2364
2362 2365 #endif /* !defined(_LP64) || defined(_ELF32_COMPAT) */
2363 2366
2364 2367 poffset = sizeof (Ehdr);
2365 2368 soffset = sizeof (Ehdr) + phdrsz;
2366 2369 doffset = sizeof (Ehdr) + phdrsz + shdrsz;
2367 2370 bzero(&shdr0, sizeof (shdr0));
2368 2371
2369 2372 /*
2370 2373 * If the count of program headers or section headers or the index
2371 2374 * of the section string table can't fit in the mere 16 bits
2372 2375 * shortsightedly allotted to them in the ELF header, we use the
2373 2376 * extended formats and put the real values in the section header
2374 2377 * as index 0.
2375 2378 */
2376 2379 if (nphdrs >= PN_XNUM) {
2377 2380 ehdr->e_phnum = PN_XNUM;
2378 2381 shdr0.sh_info = nphdrs;
2379 2382 } else {
2380 2383 ehdr->e_phnum = (unsigned short)nphdrs;
2381 2384 }
2382 2385
2383 2386 if (nshdrs > 0) {
2384 2387 if (nshdrs >= SHN_LORESERVE) {
2385 2388 ehdr->e_shnum = 0;
2386 2389 shdr0.sh_size = nshdrs;
2387 2390 } else {
2388 2391 ehdr->e_shnum = (unsigned short)nshdrs;
2389 2392 }
2390 2393
2391 2394 if (nshdrs - 1 >= SHN_LORESERVE) {
2392 2395 ehdr->e_shstrndx = SHN_XINDEX;
2393 2396 shdr0.sh_link = nshdrs - 1;
2394 2397 } else {
2395 2398 ehdr->e_shstrndx = (unsigned short)(nshdrs - 1);
2396 2399 }
2397 2400
2398 2401 ehdr->e_shoff = soffset;
2399 2402 ehdr->e_shentsize = sizeof (Shdr);
2400 2403 }
2401 2404
2402 2405 ehdr->e_version = EV_CURRENT;
2403 2406 ehdr->e_ehsize = sizeof (Ehdr);
2404 2407 ehdr->e_phoff = poffset;
2405 2408 ehdr->e_phentsize = sizeof (Phdr);
2406 2409
2407 2410 if (error = core_write(vp, UIO_SYSSPACE, (offset_t)0, ehdr,
2408 2411 sizeof (Ehdr), rlimit, credp)) {
2409 2412 goto done;
2410 2413 }
2411 2414
2412 2415 phdr = (Phdr *)bigwad;
2413 2416 bzero(phdr, phdrsz);
2414 2417
2415 2418 setup_old_note_header(&phdr[0], p);
2416 2419 phdr[0].p_offset = doffset = roundup(doffset, sizeof (Word));
2417 2420 doffset += phdr[0].p_filesz;
2418 2421
2419 2422 setup_note_header(&phdr[1], p);
2420 2423 phdr[1].p_offset = doffset = roundup(doffset, sizeof (Word));
2421 2424 doffset += phdr[1].p_filesz;
2422 2425
2423 2426 mutex_enter(&p->p_lock);
2424 2427
2425 2428 brkbase = p->p_brkbase;
2426 2429 brksize = p->p_brksize;
2427 2430
2428 2431 stkbase = p->p_usrstack - p->p_stksize;
2429 2432 stksize = p->p_stksize;
2430 2433
2431 2434 mutex_exit(&p->p_lock);
2432 2435
2433 2436 AS_LOCK_ENTER(as, RW_WRITER);
2434 2437 i = 2;
2435 2438 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
2436 2439 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2437 2440 caddr_t saddr, naddr;
2438 2441 void *tmp = NULL;
2439 2442 extern struct seg_ops segspt_shmops;
2440 2443
2441 2444 if ((seg->s_flags & S_HOLE) != 0) {
2442 2445 continue;
2443 2446 }
2444 2447
2445 2448 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2446 2449 uint_t prot;
2447 2450 size_t size;
2448 2451 int type;
2449 2452 vnode_t *mvp;
2450 2453
2451 2454 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2452 2455 prot &= PROT_READ | PROT_WRITE | PROT_EXEC;
2453 2456 if ((size = (size_t)(naddr - saddr)) == 0) {
2454 2457 ASSERT(tmp == NULL);
2455 2458 continue;
2456 2459 } else if (i == nphdrs) {
2457 2460 pr_getprot_done(&tmp);
2458 2461 overflowed = B_TRUE;
2459 2462 break;
2460 2463 }
2461 2464 phdr[i].p_type = PT_LOAD;
2462 2465 phdr[i].p_vaddr = (Addr)(uintptr_t)saddr;
2463 2466 phdr[i].p_memsz = size;
2464 2467 if (prot & PROT_READ)
2465 2468 phdr[i].p_flags |= PF_R;
2466 2469 if (prot & PROT_WRITE)
2467 2470 phdr[i].p_flags |= PF_W;
2468 2471 if (prot & PROT_EXEC)
2469 2472 phdr[i].p_flags |= PF_X;
2470 2473
2471 2474 /*
2472 2475 * Figure out which mappings to include in the core.
2473 2476 */
2474 2477 type = SEGOP_GETTYPE(seg, saddr);
2475 2478
2476 2479 if (saddr == stkbase && size == stksize) {
2477 2480 if (!(content & CC_CONTENT_STACK))
2478 2481 goto exclude;
2479 2482
2480 2483 } else if (saddr == brkbase && size == brksize) {
2481 2484 if (!(content & CC_CONTENT_HEAP))
2482 2485 goto exclude;
2483 2486
2484 2487 } else if (seg->s_ops == &segspt_shmops) {
2485 2488 if (type & MAP_NORESERVE) {
2486 2489 if (!(content & CC_CONTENT_DISM))
2487 2490 goto exclude;
2488 2491 } else {
2489 2492 if (!(content & CC_CONTENT_ISM))
2490 2493 goto exclude;
2491 2494 }
2492 2495
2493 2496 } else if (seg->s_ops != &segvn_ops) {
2494 2497 goto exclude;
2495 2498
2496 2499 } else if (type & MAP_SHARED) {
2497 2500 if (shmgetid(p, saddr) != SHMID_NONE) {
2498 2501 if (!(content & CC_CONTENT_SHM))
2499 2502 goto exclude;
2500 2503
2501 2504 } else if (SEGOP_GETVP(seg, seg->s_base,
2502 2505 &mvp) != 0 || mvp == NULL ||
2503 2506 mvp->v_type != VREG) {
2504 2507 if (!(content & CC_CONTENT_SHANON))
2505 2508 goto exclude;
2506 2509
2507 2510 } else {
2508 2511 if (!(content & CC_CONTENT_SHFILE))
2509 2512 goto exclude;
2510 2513 }
2511 2514
2512 2515 } else if (SEGOP_GETVP(seg, seg->s_base, &mvp) != 0 ||
2513 2516 mvp == NULL || mvp->v_type != VREG) {
2514 2517 if (!(content & CC_CONTENT_ANON))
2515 2518 goto exclude;
2516 2519
2517 2520 } else if (prot == (PROT_READ | PROT_EXEC)) {
2518 2521 if (!(content & CC_CONTENT_TEXT))
2519 2522 goto exclude;
2520 2523
2521 2524 } else if (prot == PROT_READ) {
2522 2525 if (!(content & CC_CONTENT_RODATA))
2523 2526 goto exclude;
2524 2527
2525 2528 } else {
2526 2529 if (!(content & CC_CONTENT_DATA))
2527 2530 goto exclude;
2528 2531 }
2529 2532
2530 2533 doffset = roundup(doffset, sizeof (Word));
2531 2534 phdr[i].p_offset = doffset;
2532 2535 phdr[i].p_filesz = size;
2533 2536 doffset += size;
2534 2537 exclude:
2535 2538 i++;
2536 2539 }
2537 2540 VERIFY(tmp == NULL);
2538 2541 if (overflowed)
2539 2542 break;
2540 2543 }
2541 2544 AS_LOCK_EXIT(as);
2542 2545
2543 2546 if (overflowed || i != nphdrs) {
2544 2547 if (!retried) {
2545 2548 retried = B_TRUE;
2546 2549 overflowed = B_FALSE;
2547 2550 kmem_free(bigwad, bigsize);
2548 2551 goto top;
2549 2552 }
2550 2553 cmn_err(CE_WARN, "elfcore: core dump failed for "
2551 2554 "process %d; address space is changing", p->p_pid);
2552 2555 error = EIO;
2553 2556 goto done;
2554 2557 }
2555 2558
2556 2559 if ((error = core_write(vp, UIO_SYSSPACE, poffset,
2557 2560 phdr, phdrsz, rlimit, credp)) != 0) {
2558 2561 goto done;
2559 2562 }
2560 2563
2561 2564 if ((error = write_old_elfnotes(p, sig, vp, phdr[0].p_offset, rlimit,
2562 2565 credp)) != 0) {
2563 2566 goto done;
2564 2567 }
2565 2568 if ((error = write_elfnotes(p, sig, vp, phdr[1].p_offset, rlimit,
2566 2569 credp, content)) != 0) {
2567 2570 goto done;
2568 2571 }
|
↓ open down ↓ |
291 lines elided |
↑ open up ↑ |
2569 2572
2570 2573 for (i = 2; i < nphdrs; i++) {
2571 2574 prkillinfo_t killinfo;
2572 2575 sigqueue_t *sq;
2573 2576 int sig, j;
2574 2577
2575 2578 if (phdr[i].p_filesz == 0)
2576 2579 continue;
2577 2580
2578 2581 /*
2582 + * If we hit a region that was mapped PROT_NONE then we cannot
2583 + * continue dumping this normally as the kernel would be unable
2584 + * to read from the page and that would result in us failing to
2585 + * dump the page. As such, any region mapped PROT_NONE, we dump
2586 + * as a zero-filled page such that this is still represented in
2587 + * the map.
2588 + *
2579 2589 * If dumping out this segment fails, rather than failing
2580 2590 * the core dump entirely, we reset the size of the mapping
2581 2591 * to zero to indicate that the data is absent from the core
2582 2592 * file and or in the PF_SUNW_FAILURE flag to differentiate
2583 2593 * this from mappings that were excluded due to the core file
2584 2594 * content settings.
2585 2595 */
2586 - if ((error = core_seg(p, vp, phdr[i].p_offset,
2587 - (caddr_t)(uintptr_t)phdr[i].p_vaddr, phdr[i].p_filesz,
2588 - rlimit, credp)) == 0) {
2589 - continue;
2596 + if ((phdr[i].p_flags & (PF_R | PF_W | PF_X)) == 0) {
2597 + size_t towrite = phdr[i].p_filesz;
2598 + size_t curoff = 0;
2599 +
2600 + if (zeropg == NULL) {
2601 + zeropg = kmem_zalloc(elf_zeropg_sz, KM_SLEEP);
2602 + }
2603 +
2604 + error = 0;
2605 + while (towrite != 0) {
2606 + size_t len = MIN(towrite, elf_zeropg_sz);
2607 +
2608 + error = core_write(vp, UIO_SYSSPACE,
2609 + phdr[i].p_offset + curoff, zeropg, len,
2610 + rlimit, credp);
2611 + if (error != 0)
2612 + break;
2613 +
2614 + towrite -= len;
2615 + curoff += len;
2616 + }
2617 + } else {
2618 + error = core_seg(p, vp, phdr[i].p_offset,
2619 + (caddr_t)(uintptr_t)phdr[i].p_vaddr,
2620 + phdr[i].p_filesz, rlimit, credp);
2590 2621 }
2622 + if (error == 0)
2623 + continue;
2591 2624
2592 2625 if ((sig = lwp->lwp_cursig) == 0) {
2593 2626 /*
2594 2627 * We failed due to something other than a signal.
2595 2628 * Since the space reserved for the segment is now
2596 2629 * unused, we stash the errno in the first four
2597 2630 * bytes. This undocumented interface will let us
2598 2631 * understand the nature of the failure.
2599 2632 */
2600 2633 (void) core_write(vp, UIO_SYSSPACE, phdr[i].p_offset,
2601 2634 &error, sizeof (error), rlimit, credp);
2602 2635
2603 2636 phdr[i].p_filesz = 0;
2604 2637 phdr[i].p_flags |= PF_SUNW_FAILURE;
2605 2638 if ((error = core_write(vp, UIO_SYSSPACE,
2606 2639 poffset + sizeof (Phdr) * i, &phdr[i],
2607 2640 sizeof (Phdr), rlimit, credp)) != 0)
2608 2641 goto done;
2609 2642
2610 2643 continue;
2611 2644 }
2612 2645
2613 2646 /*
2614 2647 * We took a signal. We want to abort the dump entirely, but
2615 2648 * we also want to indicate what failed and why. We therefore
2616 2649 * use the space reserved for the first failing segment to
2617 2650 * write our error (which, for purposes of compatability with
2618 2651 * older core dump readers, we set to EINTR) followed by any
2619 2652 * siginfo associated with the signal.
2620 2653 */
2621 2654 bzero(&killinfo, sizeof (killinfo));
2622 2655 killinfo.prk_error = EINTR;
2623 2656
2624 2657 sq = sig == SIGKILL ? curproc->p_killsqp : lwp->lwp_curinfo;
2625 2658
2626 2659 if (sq != NULL) {
2627 2660 bcopy(&sq->sq_info, &killinfo.prk_info,
2628 2661 sizeof (sq->sq_info));
2629 2662 } else {
2630 2663 killinfo.prk_info.si_signo = lwp->lwp_cursig;
2631 2664 killinfo.prk_info.si_code = SI_NOINFO;
2632 2665 }
2633 2666
2634 2667 #if (defined(_SYSCALL32_IMPL) || defined(_LP64))
2635 2668 /*
2636 2669 * If this is a 32-bit process, we need to translate from the
2637 2670 * native siginfo to the 32-bit variant. (Core readers must
2638 2671 * always have the same data model as their target or must
2639 2672 * be aware of -- and compensate for -- data model differences.)
2640 2673 */
2641 2674 if (curproc->p_model == DATAMODEL_ILP32) {
2642 2675 siginfo32_t si32;
2643 2676
2644 2677 siginfo_kto32((k_siginfo_t *)&killinfo.prk_info, &si32);
2645 2678 bcopy(&si32, &killinfo.prk_info, sizeof (si32));
2646 2679 }
2647 2680 #endif
2648 2681
2649 2682 (void) core_write(vp, UIO_SYSSPACE, phdr[i].p_offset,
2650 2683 &killinfo, sizeof (killinfo), rlimit, credp);
2651 2684
2652 2685 /*
2653 2686 * For the segment on which we took the signal, indicate that
2654 2687 * its data now refers to a siginfo.
2655 2688 */
2656 2689 phdr[i].p_filesz = 0;
2657 2690 phdr[i].p_flags |= PF_SUNW_FAILURE | PF_SUNW_KILLED |
2658 2691 PF_SUNW_SIGINFO;
2659 2692
2660 2693 /*
2661 2694 * And for every other segment, indicate that its absence
2662 2695 * is due to a signal.
2663 2696 */
2664 2697 for (j = i + 1; j < nphdrs; j++) {
2665 2698 phdr[j].p_filesz = 0;
2666 2699 phdr[j].p_flags |= PF_SUNW_FAILURE | PF_SUNW_KILLED;
2667 2700 }
2668 2701
2669 2702 /*
2670 2703 * Finally, write out our modified program headers.
2671 2704 */
2672 2705 if ((error = core_write(vp, UIO_SYSSPACE,
2673 2706 poffset + sizeof (Phdr) * i, &phdr[i],
2674 2707 sizeof (Phdr) * (nphdrs - i), rlimit, credp)) != 0) {
2675 2708 goto done;
2676 2709 }
2677 2710
2678 2711 break;
2679 2712 }
2680 2713
2681 2714 if (nshdrs > 0) {
2682 2715 Shdr *shdr = (Shdr *)bigwad;
2683 2716
2684 2717 bzero(shdr, shdrsz);
2685 2718 if (nshdrs > 1) {
2686 2719 ctx.ecc_doffset = doffset;
2687 2720 AS_LOCK_ENTER(as, RW_WRITER);
2688 2721 error = elf_process_scns(&ctx, shdr, nshdrs, NULL);
2689 2722 AS_LOCK_EXIT(as);
2690 2723 if (error != 0) {
2691 2724 goto done;
2692 2725 }
2693 2726 }
2694 2727 /* Copy any extended format data destined for the first shdr */
2695 2728 bcopy(&shdr0, shdr, sizeof (shdr0));
2696 2729
2697 2730 error = core_write(vp, UIO_SYSSPACE, soffset, shdr, shdrsz,
2698 2731 rlimit, credp);
2699 2732 }
2700 2733
2701 2734 done:
2702 2735 if (ctx.ecc_bufsz != 0) {
2703 2736 kmem_free(ctx.ecc_buf, ctx.ecc_bufsz);
2704 2737 }
2705 2738 kmem_free(bigwad, bigsize);
2706 2739 return (error);
2707 2740 }
2708 2741
2709 2742 #ifndef _ELF32_COMPAT
2710 2743
2711 2744 static struct execsw esw = {
2712 2745 #ifdef _LP64
2713 2746 elf64magicstr,
2714 2747 #else /* _LP64 */
2715 2748 elf32magicstr,
2716 2749 #endif /* _LP64 */
2717 2750 0,
2718 2751 5,
2719 2752 elfexec,
2720 2753 elfcore
2721 2754 };
2722 2755
2723 2756 static struct modlexec modlexec = {
2724 2757 &mod_execops, "exec module for elf", &esw
2725 2758 };
2726 2759
2727 2760 #ifdef _LP64
2728 2761 extern int elf32exec(vnode_t *vp, execa_t *uap, uarg_t *args,
2729 2762 intpdata_t *idatap, int level, size_t *execsz,
2730 2763 int setid, caddr_t exec_file, cred_t *cred,
2731 2764 int *brand_action);
2732 2765 extern int elf32core(vnode_t *vp, proc_t *p, cred_t *credp,
2733 2766 rlim64_t rlimit, int sig, core_content_t content);
2734 2767
2735 2768 static struct execsw esw32 = {
2736 2769 elf32magicstr,
2737 2770 0,
2738 2771 5,
2739 2772 elf32exec,
2740 2773 elf32core
2741 2774 };
2742 2775
2743 2776 static struct modlexec modlexec32 = {
2744 2777 &mod_execops, "32-bit exec module for elf", &esw32
2745 2778 };
2746 2779 #endif /* _LP64 */
2747 2780
2748 2781 static struct modlinkage modlinkage = {
2749 2782 MODREV_1,
2750 2783 (void *)&modlexec,
2751 2784 #ifdef _LP64
2752 2785 (void *)&modlexec32,
2753 2786 #endif /* _LP64 */
2754 2787 NULL
2755 2788 };
2756 2789
2757 2790 int
2758 2791 _init(void)
2759 2792 {
2760 2793 return (mod_install(&modlinkage));
2761 2794 }
2762 2795
2763 2796 int
2764 2797 _fini(void)
2765 2798 {
2766 2799 return (mod_remove(&modlinkage));
2767 2800 }
2768 2801
2769 2802 int
2770 2803 _info(struct modinfo *modinfop)
2771 2804 {
2772 2805 return (mod_info(&modlinkage, modinfop));
2773 2806 }
2774 2807
2775 2808 #endif /* !_ELF32_COMPAT */
|
↓ open down ↓ |
175 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX