Print this page
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/os/brand.c
+++ new/usr/src/uts/common/os/brand.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 * Copyright (c) 2015, Joyent, Inc. All rights reserved.
24 24 */
25 25
26 26 #include <sys/kmem.h>
27 27 #include <sys/errno.h>
28 28 #include <sys/systm.h>
29 29 #include <sys/cmn_err.h>
30 30 #include <sys/brand.h>
31 31 #include <sys/machbrand.h>
32 32 #include <sys/modctl.h>
33 33 #include <sys/rwlock.h>
34 34 #include <sys/zone.h>
35 35 #include <sys/pathname.h>
36 36
37 37 #define SUPPORTED_BRAND_VERSION BRAND_VER_1
38 38
39 39 #if defined(__sparcv9)
40 40 /* sparcv9 uses system wide brand interposition hooks */
41 41 static void brand_plat_interposition_enable(void);
42 42 static void brand_plat_interposition_disable(void);
43 43
44 44 struct brand_mach_ops native_mach_ops = {
45 45 NULL, NULL
46 46 };
47 47 #else /* !__sparcv9 */
48 48 struct brand_mach_ops native_mach_ops = {
49 49 NULL, NULL, NULL, NULL, NULL, NULL, NULL
50 50 };
51 51 #endif /* !__sparcv9 */
52 52
53 53 brand_t native_brand = {
54 54 BRAND_VER_1,
55 55 "native",
56 56 NULL,
57 57 &native_mach_ops,
58 58 0
59 59 };
60 60
61 61 /*
62 62 * Used to maintain a list of all the brands currently loaded into the
63 63 * kernel.
64 64 */
65 65 struct brand_list {
66 66 int bl_refcnt;
67 67 struct brand_list *bl_next;
68 68 brand_t *bl_brand;
69 69 };
70 70
71 71 static struct brand_list *brand_list = NULL;
72 72
73 73 /*
74 74 * This lock protects the integrity of the brand list.
75 75 */
76 76 static kmutex_t brand_list_lock;
77 77
78 78 void
79 79 brand_init()
80 80 {
81 81 mutex_init(&brand_list_lock, NULL, MUTEX_DEFAULT, NULL);
82 82 p0.p_brand = &native_brand;
83 83 }
84 84
85 85 int
86 86 brand_register(brand_t *brand)
87 87 {
88 88 struct brand_list *list, *scan;
89 89
90 90 if (brand == NULL)
91 91 return (EINVAL);
92 92
93 93 if (brand->b_version != SUPPORTED_BRAND_VERSION) {
94 94 if (brand->b_version < SUPPORTED_BRAND_VERSION) {
95 95 cmn_err(CE_WARN,
96 96 "brand '%s' was built to run on older versions "
97 97 "of Solaris.",
98 98 brand->b_name);
99 99 } else {
100 100 cmn_err(CE_WARN,
101 101 "brand '%s' was built to run on a newer version "
102 102 "of Solaris.",
103 103 brand->b_name);
104 104 }
105 105 return (EINVAL);
106 106 }
107 107
108 108 /* Sanity checks */
109 109 if (brand->b_name == NULL || brand->b_ops == NULL ||
110 110 brand->b_ops->b_brandsys == NULL) {
111 111 cmn_err(CE_WARN, "Malformed brand");
112 112 return (EINVAL);
113 113 }
114 114
115 115 list = kmem_alloc(sizeof (struct brand_list), KM_SLEEP);
116 116
117 117 /* Add the brand to the list of loaded brands. */
118 118 mutex_enter(&brand_list_lock);
119 119
120 120 /*
121 121 * Check to be sure we haven't already registered this brand.
122 122 */
123 123 for (scan = brand_list; scan != NULL; scan = scan->bl_next) {
124 124 if (strcmp(brand->b_name, scan->bl_brand->b_name) == 0) {
125 125 cmn_err(CE_WARN,
126 126 "Invalid attempt to load a second instance of "
127 127 "brand %s", brand->b_name);
128 128 mutex_exit(&brand_list_lock);
129 129 kmem_free(list, sizeof (struct brand_list));
130 130 return (EINVAL);
131 131 }
132 132 }
133 133
134 134 #if defined(__sparcv9)
135 135 /* sparcv9 uses system wide brand interposition hooks */
136 136 if (brand_list == NULL)
137 137 brand_plat_interposition_enable();
138 138 #endif /* __sparcv9 */
139 139
140 140 list->bl_brand = brand;
141 141 list->bl_refcnt = 0;
142 142 list->bl_next = brand_list;
143 143 brand_list = list;
144 144
145 145 mutex_exit(&brand_list_lock);
146 146
147 147 return (0);
148 148 }
149 149
150 150 /*
151 151 * The kernel module implementing this brand is being unloaded, so remove
152 152 * it from the list of active brands.
153 153 */
154 154 int
155 155 brand_unregister(brand_t *brand)
156 156 {
157 157 struct brand_list *list, *prev;
158 158
159 159 /* Sanity checks */
160 160 if (brand == NULL || brand->b_name == NULL) {
161 161 cmn_err(CE_WARN, "Malformed brand");
162 162 return (EINVAL);
163 163 }
164 164
165 165 prev = NULL;
166 166 mutex_enter(&brand_list_lock);
167 167
168 168 for (list = brand_list; list != NULL; list = list->bl_next) {
169 169 if (list->bl_brand == brand)
170 170 break;
171 171 prev = list;
172 172 }
173 173
174 174 if (list == NULL) {
175 175 cmn_err(CE_WARN, "Brand %s wasn't registered", brand->b_name);
176 176 mutex_exit(&brand_list_lock);
177 177 return (EINVAL);
178 178 }
179 179
180 180 if (list->bl_refcnt > 0) {
181 181 cmn_err(CE_WARN, "Unregistering brand %s which is still in use",
182 182 brand->b_name);
183 183 mutex_exit(&brand_list_lock);
184 184 return (EBUSY);
185 185 }
186 186
187 187 /* Remove brand from the list */
188 188 if (prev != NULL)
189 189 prev->bl_next = list->bl_next;
190 190 else
191 191 brand_list = list->bl_next;
192 192
193 193 #if defined(__sparcv9)
194 194 /* sparcv9 uses system wide brand interposition hooks */
195 195 if (brand_list == NULL)
196 196 brand_plat_interposition_disable();
197 197 #endif /* __sparcv9 */
198 198
199 199 mutex_exit(&brand_list_lock);
200 200
201 201 kmem_free(list, sizeof (struct brand_list));
202 202
203 203 return (0);
204 204 }
205 205
206 206 /*
207 207 * Record that a zone of this brand has been instantiated. If the kernel
208 208 * module implementing this brand's functionality is not present, this
209 209 * routine attempts to load the module as a side effect.
210 210 */
211 211 brand_t *
212 212 brand_register_zone(struct brand_attr *attr)
213 213 {
214 214 struct brand_list *l = NULL;
215 215 ddi_modhandle_t hdl = NULL;
216 216 char *modname;
217 217 int err = 0;
218 218
219 219 if (is_system_labeled()) {
220 220 cmn_err(CE_WARN,
221 221 "Branded zones are not allowed on labeled systems.");
222 222 return (NULL);
223 223 }
224 224
225 225 /*
226 226 * We make at most two passes through this loop. The first time
227 227 * through, we're looking to see if this is a new user of an
228 228 * already loaded brand. If the brand hasn't been loaded, we
229 229 * call ddi_modopen() to force it to be loaded and then make a
230 230 * second pass through the list of brands. If we don't find the
231 231 * brand the second time through it means that the modname
232 232 * specified in the brand_attr structure doesn't provide the brand
233 233 * specified in the brandname field. This would suggest a bug in
234 234 * the brand's config.xml file. We close the module and return
235 235 * 'NULL' to the caller.
236 236 */
237 237 for (;;) {
238 238 /*
239 239 * Search list of loaded brands
240 240 */
241 241 mutex_enter(&brand_list_lock);
242 242 for (l = brand_list; l != NULL; l = l->bl_next)
243 243 if (strcmp(attr->ba_brandname,
244 244 l->bl_brand->b_name) == 0)
245 245 break;
246 246 if ((l != NULL) || (hdl != NULL))
247 247 break;
248 248 mutex_exit(&brand_list_lock);
249 249
250 250 /*
251 251 * We didn't find that the requested brand has been loaded
252 252 * yet, so we trigger the load of the appropriate kernel
253 253 * module and search the list again.
254 254 */
255 255 modname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
256 256 (void) strcpy(modname, "brand/");
257 257 (void) strcat(modname, attr->ba_modname);
258 258 hdl = ddi_modopen(modname, KRTLD_MODE_FIRST, &err);
259 259 kmem_free(modname, MAXPATHLEN);
260 260
261 261 if (err != 0)
262 262 return (NULL);
263 263 }
264 264
265 265 /*
266 266 * If we found the matching brand, bump its reference count.
267 267 */
268 268 if (l != NULL)
269 269 l->bl_refcnt++;
270 270
271 271 mutex_exit(&brand_list_lock);
272 272
273 273 if (hdl != NULL)
274 274 (void) ddi_modclose(hdl);
275 275
276 276 return ((l != NULL) ? l->bl_brand : NULL);
277 277 }
278 278
279 279 /*
280 280 * Return the number of zones currently using this brand.
281 281 */
282 282 int
283 283 brand_zone_count(struct brand *bp)
284 284 {
285 285 struct brand_list *l;
286 286 int cnt = 0;
287 287
288 288 mutex_enter(&brand_list_lock);
289 289 for (l = brand_list; l != NULL; l = l->bl_next)
290 290 if (l->bl_brand == bp) {
291 291 cnt = l->bl_refcnt;
292 292 break;
293 293 }
294 294 mutex_exit(&brand_list_lock);
295 295
296 296 return (cnt);
297 297 }
298 298
299 299 void
300 300 brand_unregister_zone(struct brand *bp)
301 301 {
302 302 struct brand_list *list;
303 303
304 304 mutex_enter(&brand_list_lock);
305 305 for (list = brand_list; list != NULL; list = list->bl_next) {
306 306 if (list->bl_brand == bp) {
307 307 ASSERT(list->bl_refcnt > 0);
308 308 list->bl_refcnt--;
309 309 break;
310 310 }
311 311 }
312 312 mutex_exit(&brand_list_lock);
313 313 }
314 314
315 315 int
316 316 brand_setbrand(proc_t *p, boolean_t lwps_ok)
317 317 {
318 318 brand_t *bp = p->p_zone->zone_brand;
319 319 void *brand_data = NULL;
320 320
321 321 VERIFY(MUTEX_NOT_HELD(&p->p_lock));
322 322 VERIFY(bp != NULL);
323 323
324 324 /*
325 325 * Process branding occurs during fork() and exec(). When it happens
326 326 * during fork(), the LWP count will always be 0 since branding is
327 327 * performed as part of getproc(), before LWPs have been associated.
328 328 * The same is not true during exec(), where a multi-LWP process may
329 329 * undergo branding just prior to gexec(). This is to ensure
330 330 * exec-related brand hooks are available. While it may seem
331 331 * complicated to brand a multi-LWP process, the two possible outcomes
332 332 * simplify things:
333 333 *
334 334 * 1. The exec() succeeds: LWPs besides the caller will be killed and
335 335 * any further branding will occur in a single-LWP context.
336 336 * 2. The exec() fails: The process will be promptly unbranded since
337 337 * the hooks are no longer needed.
338 338 *
339 339 * To prevent inconsistent brand state from being encountered during
340 340 * the exec(), LWPs beyond the caller which are associated with this
341 341 * process must be held temporarily. They will be released either when
342 342 * they are killed in the exec() success, or when the brand is cleared
343 343 * after exec() failure.
344 344 */
345 345 if (lwps_ok) {
346 346 /*
347 347 * We've been called from a exec() context tolerating the
348 348 * existence of multiple LWPs during branding is necessary.
349 349 */
350 350 VERIFY(p == curproc);
351 351 VERIFY(p->p_tlist != NULL);
352 352
353 353 if (p->p_tlist != p->p_tlist->t_forw) {
354 354 /*
355 355 * Multiple LWPs are present. Hold all but the caller.
356 356 */
357 357 if (!holdlwps(SHOLDFORK1)) {
358 358 return (-1);
359 359 }
360 360 }
361 361 } else {
362 362 /*
363 363 * Processes branded during fork() should not have LWPs at all.
364 364 */
365 365 VERIFY(p->p_tlist == NULL);
366 366 }
367 367
368 368 if (bp->b_data_size > 0) {
369 369 brand_data = kmem_zalloc(bp->b_data_size, KM_SLEEP);
370 370 }
371 371
372 372 mutex_enter(&p->p_lock);
373 373 ASSERT(!PROC_IS_BRANDED(p));
374 374 p->p_brand = bp;
375 375 p->p_brand_data = brand_data;
376 376 ASSERT(PROC_IS_BRANDED(p));
377 377 BROP(p)->b_setbrand(p);
378 378 mutex_exit(&p->p_lock);
379 379 return (0);
380 380 }
381 381
382 382 void
383 383 brand_clearbrand(proc_t *p, boolean_t lwps_ok)
384 384 {
385 385 brand_t *bp = p->p_zone->zone_brand;
386 386 void *brand_data;
387 387
388 388 VERIFY(MUTEX_NOT_HELD(&p->p_lock));
389 389 VERIFY(bp != NULL);
390 390 VERIFY(PROC_IS_BRANDED(p));
391 391
392 392 mutex_enter(&p->p_lock);
393 393 p->p_brand = &native_brand;
394 394 brand_data = p->p_brand_data;
395 395 p->p_brand_data = NULL;
396 396
397 397 if (lwps_ok) {
398 398 VERIFY(p == curproc);
399 399 /*
400 400 * A process with multiple LWPs is being de-branded after
401 401 * failing an exec. The other LWPs were held as part of the
402 402 * procedure, so they must be resumed now.
403 403 */
404 404 if (p->p_tlist != NULL && p->p_tlist != p->p_tlist->t_forw) {
405 405 continuelwps(p);
406 406 }
407 407 } else {
408 408 /*
409 409 * While clearing the brand, it's ok for one LWP to be present.
410 410 * This happens when a native binary is executed inside a
411 411 * branded zone, since the brand will be removed during the
412 412 * course of a successful exec.
413 413 */
414 414 VERIFY(p->p_tlist == NULL || p->p_tlist == p->p_tlist->t_forw);
415 415 }
416 416 mutex_exit(&p->p_lock);
417 417
418 418 if (brand_data != NULL) {
419 419 kmem_free(brand_data, bp->b_data_size);
420 420 }
421 421 }
422 422
423 423 #if defined(__sparcv9)
424 424 /*
425 425 * Currently, only sparc has system level brand syscall interposition.
426 426 * On x86 we're able to enable syscall interposition on a per-cpu basis
427 427 * when a branded thread is scheduled to run on a cpu.
428 428 */
429 429
430 430 /* Local variables needed for dynamic syscall interposition support */
431 431 static uint32_t syscall_trap_patch_instr_orig;
432 432 static uint32_t syscall_trap32_patch_instr_orig;
433 433
434 434 /* Trap Table syscall entry hot patch points */
435 435 extern void syscall_trap_patch_point(void);
436 436 extern void syscall_trap32_patch_point(void);
437 437
438 438 /* Alternate syscall entry handlers used when branded zones are running */
439 439 extern void syscall_wrapper(void);
440 440 extern void syscall_wrapper32(void);
441 441
442 442 /* Macros used to facilitate sparcv9 instruction generation */
443 443 #define BA_A_INSTR 0x30800000 /* ba,a addr */
444 444 #define DISP22(from, to) \
445 445 ((((uintptr_t)(to) - (uintptr_t)(from)) >> 2) & 0x3fffff)
446 446
447 447 /*ARGSUSED*/
448 448 static void
449 449 brand_plat_interposition_enable(void)
450 450 {
451 451 ASSERT(MUTEX_HELD(&brand_list_lock));
452 452
453 453 /*
454 454 * Before we hot patch the kernel save the current instructions
455 455 * so that we can restore them later.
456 456 */
457 457 syscall_trap_patch_instr_orig =
458 458 *(uint32_t *)syscall_trap_patch_point;
459 459 syscall_trap32_patch_instr_orig =
460 460 *(uint32_t *)syscall_trap32_patch_point;
461 461
462 462 /*
463 463 * Modify the trap table at the patch points.
464 464 *
465 465 * We basically replace the first instruction at the patch
466 466 * point with a ba,a instruction that will transfer control
467 467 * to syscall_wrapper or syscall_wrapper32 for 64-bit and
468 468 * 32-bit syscalls respectively. It's important to note that
469 469 * the annul bit is set in the branch so we don't execute
470 470 * the instruction directly following the one we're patching
471 471 * during the branch's delay slot.
472 472 *
473 473 * It also doesn't matter that we're not atomically updating both
474 474 * the 64 and 32 bit syscall paths at the same time since there's
475 475 * no actual branded processes running on the system yet.
476 476 */
477 477 hot_patch_kernel_text((caddr_t)syscall_trap_patch_point,
478 478 BA_A_INSTR | DISP22(syscall_trap_patch_point, syscall_wrapper),
479 479 4);
480 480 hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point,
481 481 BA_A_INSTR | DISP22(syscall_trap32_patch_point, syscall_wrapper32),
482 482 4);
483 483 }
484 484
485 485 /*ARGSUSED*/
486 486 static void
487 487 brand_plat_interposition_disable(void)
488 488 {
489 489 ASSERT(MUTEX_HELD(&brand_list_lock));
490 490
491 491 /*
492 492 * Restore the original instructions at the trap table syscall
493 493 * patch points to disable the brand syscall interposition
494 494 * mechanism.
495 495 */
496 496 hot_patch_kernel_text((caddr_t)syscall_trap_patch_point,
497 497 syscall_trap_patch_instr_orig, 4);
498 498 hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point,
499 499 syscall_trap32_patch_instr_orig, 4);
500 500 }
501 501 #endif /* __sparcv9 */
502 502
503 503 /*
504 504 * The following functions can be shared among kernel brand modules which
505 505 * implement Solaris-derived brands, all of which need to do similar tasks
506 506 * to manage the brand.
507 507 */
508 508
509 509 #if defined(_LP64)
510 510 static void
511 511 Ehdr32to64(Elf32_Ehdr *src, Ehdr *dst)
512 512 {
513 513 bcopy(src->e_ident, dst->e_ident, sizeof (src->e_ident));
514 514 dst->e_type = src->e_type;
515 515 dst->e_machine = src->e_machine;
516 516 dst->e_version = src->e_version;
517 517 dst->e_entry = src->e_entry;
518 518 dst->e_phoff = src->e_phoff;
519 519 dst->e_shoff = src->e_shoff;
520 520 dst->e_flags = src->e_flags;
521 521 dst->e_ehsize = src->e_ehsize;
522 522 dst->e_phentsize = src->e_phentsize;
523 523 dst->e_phnum = src->e_phnum;
524 524 dst->e_shentsize = src->e_shentsize;
525 525 dst->e_shnum = src->e_shnum;
526 526 dst->e_shstrndx = src->e_shstrndx;
527 527 }
528 528 #endif /* _LP64 */
529 529
530 530 /*
531 531 * Return -1 if the cmd was not handled by this function.
532 532 */
533 533 /*ARGSUSED*/
534 534 int
535 535 brand_solaris_cmd(int cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3,
536 536 struct brand *pbrand, int brandvers)
537 537 {
538 538 brand_proc_data_t *spd;
539 539 brand_proc_reg_t reg;
540 540 proc_t *p = curproc;
541 541 int err;
542 542
543 543 /*
544 544 * There is one operation that is supported for a native
545 545 * process; B_EXEC_BRAND. This brand operaion is redundant
546 546 * since the kernel assumes a native process doing an exec
547 547 * in a branded zone is going to run a branded processes.
548 548 * hence we don't support this operation.
549 549 */
550 550 if (cmd == B_EXEC_BRAND)
551 551 return (ENOSYS);
552 552
553 553 /* For all other operations this must be a branded process. */
554 554 if (!PROC_IS_BRANDED(p))
555 555 return (ENOSYS);
556 556
557 557 ASSERT(p->p_brand == pbrand);
558 558 ASSERT(p->p_brand_data != NULL);
559 559
560 560 spd = (brand_proc_data_t *)p->p_brand_data;
561 561
562 562 switch ((cmd)) {
563 563 case B_EXEC_NATIVE:
564 564 err = exec_common((char *)arg1, (const char **)arg2,
565 565 (const char **)arg3, EBA_NATIVE);
566 566 return (err);
567 567
568 568 /*
569 569 * Get the address of the user-space system call handler from
570 570 * the user process and attach it to the proc structure.
571 571 */
572 572 case B_REGISTER:
573 573 if (p->p_model == DATAMODEL_NATIVE) {
574 574 if (copyin((void *)arg1, ®, sizeof (reg)) != 0)
575 575 return (EFAULT);
576 576 }
577 577 #if defined(_LP64)
578 578 else {
579 579 brand_common_reg32_t reg32;
580 580
581 581 if (copyin((void *)arg1, ®32, sizeof (reg32)) != 0)
582 582 return (EFAULT);
583 583 reg.sbr_version = reg32.sbr_version;
584 584 reg.sbr_handler = (caddr_t)(uintptr_t)reg32.sbr_handler;
585 585 }
586 586 #endif /* _LP64 */
587 587
588 588 if (reg.sbr_version != brandvers)
589 589 return (ENOTSUP);
590 590 spd->spd_handler = reg.sbr_handler;
591 591 return (0);
592 592
593 593 case B_ELFDATA:
594 594 if (p->p_model == DATAMODEL_NATIVE) {
595 595 if (copyout(&spd->spd_elf_data, (void *)arg1,
596 596 sizeof (brand_elf_data_t)) != 0)
597 597 return (EFAULT);
598 598 }
599 599 #if defined(_LP64)
600 600 else {
601 601 brand_elf_data32_t sed32;
602 602
603 603 sed32.sed_phdr = spd->spd_elf_data.sed_phdr;
604 604 sed32.sed_phent = spd->spd_elf_data.sed_phent;
605 605 sed32.sed_phnum = spd->spd_elf_data.sed_phnum;
606 606 sed32.sed_entry = spd->spd_elf_data.sed_entry;
607 607 sed32.sed_base = spd->spd_elf_data.sed_base;
608 608 sed32.sed_ldentry = spd->spd_elf_data.sed_ldentry;
609 609 sed32.sed_lddata = spd->spd_elf_data.sed_lddata;
610 610 if (copyout(&sed32, (void *)arg1, sizeof (sed32))
611 611 != 0)
612 612 return (EFAULT);
613 613 }
614 614 #endif /* _LP64 */
615 615 return (0);
616 616
617 617 /*
618 618 * The B_TRUSS_POINT subcommand exists so that we can see
619 619 * truss output from interposed system calls that return
620 620 * without first calling any other system call, meaning they
621 621 * would be invisible to truss(1).
622 622 * If the second argument is set non-zero, set errno to that
623 623 * value as well.
624 624 *
625 625 * Common arguments seen with truss are:
626 626 *
627 627 * arg1: syscall number
628 628 * arg2: errno
629 629 */
630 630 case B_TRUSS_POINT:
631 631 return ((arg2 == 0) ? 0 : set_errno((uint_t)arg2));
632 632 }
633 633
634 634 return (-1);
635 635 }
636 636
637 637 /*ARGSUSED*/
638 638 void
639 639 brand_solaris_copy_procdata(proc_t *child, proc_t *parent, struct brand *pbrand)
640 640 {
641 641 brand_proc_data_t *spd;
642 642
643 643 ASSERT(parent->p_brand == pbrand);
644 644 ASSERT(child->p_brand == pbrand);
645 645 ASSERT(parent->p_brand_data != NULL);
646 646 ASSERT(child->p_brand_data == NULL);
647 647
648 648 /*
649 649 * Just duplicate all the proc data of the parent for the
650 650 * child
651 651 */
652 652 spd = kmem_alloc(sizeof (brand_proc_data_t), KM_SLEEP);
653 653 bcopy(parent->p_brand_data, spd, sizeof (brand_proc_data_t));
654 654 child->p_brand_data = spd;
655 655 }
656 656
657 657 static void
658 658 restoreexecenv(struct execenv *ep, stack_t *sp)
659 659 {
660 660 klwp_t *lwp = ttolwp(curthread);
661 661
662 662 setexecenv(ep);
663 663 lwp->lwp_sigaltstack.ss_sp = sp->ss_sp;
664 664 lwp->lwp_sigaltstack.ss_size = sp->ss_size;
665 665 lwp->lwp_sigaltstack.ss_flags = sp->ss_flags;
666 666 }
667 667
668 668 /*ARGSUSED*/
669 669 int
670 670 brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
671 671 intpdata_t *idatap, int level, long *execsz, int setid, caddr_t exec_file,
672 672 cred_t *cred, int *brand_action, struct brand *pbrand, char *bname,
673 673 char *brandlib, char *brandlib32)
674 674 {
675 675
676 676 vnode_t *nvp;
677 677 Ehdr ehdr;
678 678 Addr uphdr_vaddr;
679 679 intptr_t voffset;
680 680 char *interp;
681 681 int i, err;
682 682 struct execenv env;
683 683 struct execenv origenv;
684 684 stack_t orig_sigaltstack;
685 685 struct user *up = PTOU(curproc);
686 686 proc_t *p = ttoproc(curthread);
687 687 klwp_t *lwp = ttolwp(curthread);
688 688 brand_proc_data_t *spd;
689 689 brand_elf_data_t sed, *sedp;
690 690 uintptr_t lddata; /* lddata of executable's linker */
691 691
692 692 ASSERT(curproc->p_brand == pbrand);
693 693 ASSERT(curproc->p_brand_data != NULL);
694 694
695 695 spd = (brand_proc_data_t *)curproc->p_brand_data;
696 696 sedp = &spd->spd_elf_data;
697 697
698 698 args->brandname = bname;
699 699
700 700 /*
701 701 * We will exec the brand library and then map in the target
702 702 * application and (optionally) the brand's default linker.
703 703 */
704 704 if (args->to_model == DATAMODEL_NATIVE) {
705 705 args->emulator = brandlib;
706 706 }
707 707 #if defined(_LP64)
708 708 else {
709 709 args->emulator = brandlib32;
710 710 }
711 711 #endif /* _LP64 */
712 712
713 713 if ((err = lookupname(args->emulator, UIO_SYSSPACE, FOLLOW,
714 714 NULLVPP, &nvp)) != 0) {
715 715 uprintf("%s: not found.", args->emulator);
716 716 return (err);
717 717 }
718 718
719 719 /*
720 720 * The following elf{32}exec call changes the execenv in the proc
721 721 * struct which includes changing the p_exec member to be the vnode
722 722 * for the brand library (e.g. /.SUNWnative/usr/lib/s10_brand.so.1).
723 723 * We will eventually set the p_exec member to be the vnode for the new
724 724 * executable when we call setexecenv(). However, if we get an error
725 725 * before that call we need to restore the execenv to its original
726 726 * values so that when we return to the caller fop_close() works
727 727 * properly while cleaning up from the failed exec(). Restoring the
728 728 * original value will also properly decrement the 2nd VN_RELE that we
729 729 * took on the brand library.
730 730 */
731 731 origenv.ex_bssbase = p->p_bssbase;
732 732 origenv.ex_brkbase = p->p_brkbase;
733 733 origenv.ex_brksize = p->p_brksize;
734 734 origenv.ex_vp = p->p_exec;
735 735 orig_sigaltstack.ss_sp = lwp->lwp_sigaltstack.ss_sp;
736 736 orig_sigaltstack.ss_size = lwp->lwp_sigaltstack.ss_size;
737 737 orig_sigaltstack.ss_flags = lwp->lwp_sigaltstack.ss_flags;
738 738
739 739 if (args->to_model == DATAMODEL_NATIVE) {
740 740 err = elfexec(nvp, uap, args, idatap, INTP_MAXDEPTH + 1, execsz,
741 741 setid, exec_file, cred, brand_action);
742 742 }
743 743 #if defined(_LP64)
744 744 else {
745 745 err = elf32exec(nvp, uap, args, idatap, INTP_MAXDEPTH + 1,
746 746 execsz, setid, exec_file, cred, brand_action);
747 747 }
748 748 #endif /* _LP64 */
749 749 VN_RELE(nvp);
750 750 if (err != 0) {
751 751 restoreexecenv(&origenv, &orig_sigaltstack);
752 752 return (err);
753 753 }
754 754
755 755 /*
756 756 * The u_auxv veCTors are set up by elfexec to point to the
757 757 * brand emulation library and linker. Save these so they can
758 758 * be copied to the specific brand aux vectors.
759 759 */
760 760 bzero(&sed, sizeof (sed));
761 761 for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
762 762 switch (up->u_auxv[i].a_type) {
763 763 case AT_SUN_LDDATA:
764 764 sed.sed_lddata = up->u_auxv[i].a_un.a_val;
765 765 break;
766 766 case AT_BASE:
767 767 sed.sed_base = up->u_auxv[i].a_un.a_val;
768 768 break;
769 769 case AT_ENTRY:
770 770 sed.sed_entry = up->u_auxv[i].a_un.a_val;
771 771 break;
772 772 case AT_PHDR:
773 773 sed.sed_phdr = up->u_auxv[i].a_un.a_val;
774 774 break;
775 775 case AT_PHENT:
776 776 sed.sed_phent = up->u_auxv[i].a_un.a_val;
777 777 break;
778 778 case AT_PHNUM:
779 779 sed.sed_phnum = up->u_auxv[i].a_un.a_val;
780 780 break;
781 781 default:
782 782 break;
783 783 }
784 784 }
785 785 /* Make sure the emulator has an entry point */
786 786 ASSERT(sed.sed_entry != NULL);
787 787 ASSERT(sed.sed_phdr != NULL);
788 788
789 789 bzero(&env, sizeof (env));
790 790 if (args->to_model == DATAMODEL_NATIVE) {
791 791 err = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr,
792 792 &voffset, exec_file, &interp, &env.ex_bssbase,
793 793 &env.ex_brkbase, &env.ex_brksize, NULL, NULL);
794 794 }
795 795 #if defined(_LP64)
796 796 else {
797 797 Elf32_Ehdr ehdr32;
798 798 Elf32_Addr uphdr_vaddr32;
799 799 err = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32,
800 800 &voffset, exec_file, &interp, &env.ex_bssbase,
801 801 &env.ex_brkbase, &env.ex_brksize, NULL, NULL);
802 802 Ehdr32to64(&ehdr32, &ehdr);
803 803
804 804 if (uphdr_vaddr32 == (Elf32_Addr)-1)
805 805 uphdr_vaddr = (Addr)-1;
806 806 else
807 807 uphdr_vaddr = uphdr_vaddr32;
808 808 }
809 809 #endif /* _LP64 */
810 810 if (err != 0) {
811 811 restoreexecenv(&origenv, &orig_sigaltstack);
812 812
813 813 if (interp != NULL)
814 814 kmem_free(interp, MAXPATHLEN);
815 815
816 816 return (err);
817 817 }
818 818
819 819 /*
820 820 * Save off the important properties of the executable. The
821 821 * brand library will ask us for this data later, when it is
822 822 * initializing and getting ready to transfer control to the
823 823 * brand application.
824 824 */
825 825 if (uphdr_vaddr == (Addr)-1)
826 826 sedp->sed_phdr = voffset + ehdr.e_phoff;
827 827 else
828 828 sedp->sed_phdr = voffset + uphdr_vaddr;
829 829 sedp->sed_entry = voffset + ehdr.e_entry;
830 830 sedp->sed_phent = ehdr.e_phentsize;
831 831 sedp->sed_phnum = ehdr.e_phnum;
832 832
833 833 if (interp != NULL) {
834 834 if (ehdr.e_type == ET_DYN) {
835 835 /*
836 836 * This is a shared object executable, so we
837 837 * need to pick a reasonable place to put the
838 838 * heap. Just don't use the first page.
839 839 */
840 840 env.ex_brkbase = (caddr_t)PAGESIZE;
841 841 env.ex_bssbase = (caddr_t)PAGESIZE;
842 842 }
843 843
844 844 /*
845 845 * If the program needs an interpreter (most do), map
846 846 * it in and store relevant information about it in the
847 847 * aux vector, where the brand library can find it.
848 848 */
849 849 if ((err = lookupname(interp, UIO_SYSSPACE,
850 850 FOLLOW, NULLVPP, &nvp)) != 0) {
851 851 uprintf("%s: not found.", interp);
852 852 restoreexecenv(&origenv, &orig_sigaltstack);
853 853 kmem_free(interp, MAXPATHLEN);
854 854 return (err);
855 855 }
856 856
857 857 kmem_free(interp, MAXPATHLEN);
858 858
859 859 if (args->to_model == DATAMODEL_NATIVE) {
860 860 err = mapexec_brand(nvp, args, &ehdr,
861 861 &uphdr_vaddr, &voffset, exec_file, &interp,
862 862 NULL, NULL, NULL, &lddata, NULL);
863 863 }
864 864 #if defined(_LP64)
865 865 else {
866 866 Elf32_Ehdr ehdr32;
867 867 Elf32_Addr uphdr_vaddr32;
868 868 err = mapexec32_brand(nvp, args, &ehdr32,
869 869 &uphdr_vaddr32, &voffset, exec_file, &interp,
870 870 NULL, NULL, NULL, &lddata, NULL);
871 871 Ehdr32to64(&ehdr32, &ehdr);
872 872
873 873 if (uphdr_vaddr32 == (Elf32_Addr)-1)
874 874 uphdr_vaddr = (Addr)-1;
875 875 else
876 876 uphdr_vaddr = uphdr_vaddr32;
877 877 }
878 878 #endif /* _LP64 */
879 879 VN_RELE(nvp);
880 880 if (err != 0) {
881 881 restoreexecenv(&origenv, &orig_sigaltstack);
882 882 return (err);
883 883 }
884 884
885 885 /*
886 886 * Now that we know the base address of the brand's
887 887 * linker, place it in the aux vector.
888 888 */
889 889 sedp->sed_base = voffset;
890 890 sedp->sed_ldentry = voffset + ehdr.e_entry;
891 891 sedp->sed_lddata = voffset + lddata;
892 892 } else {
893 893 /*
894 894 * This program has no interpreter. The brand library
895 895 * will jump to the address in the AT_SUN_BRAND_LDENTRY
896 896 * aux vector, so in this case, put the entry point of
897 897 * the main executable there.
898 898 */
899 899 if (ehdr.e_type == ET_EXEC) {
900 900 /*
901 901 * An executable with no interpreter, this must
902 902 * be a statically linked executable, which
903 903 * means we loaded it at the address specified
904 904 * in the elf header, in which case the e_entry
905 905 * field of the elf header is an absolute
906 906 * address.
907 907 */
908 908 sedp->sed_ldentry = ehdr.e_entry;
909 909 sedp->sed_entry = ehdr.e_entry;
910 910 sedp->sed_lddata = NULL;
911 911 sedp->sed_base = NULL;
912 912 } else {
913 913 /*
914 914 * A shared object with no interpreter, we use
915 915 * the calculated address from above.
916 916 */
917 917 sedp->sed_ldentry = sedp->sed_entry;
918 918 sedp->sed_entry = NULL;
919 919 sedp->sed_phdr = NULL;
920 920 sedp->sed_phent = NULL;
921 921 sedp->sed_phnum = NULL;
922 922 sedp->sed_lddata = NULL;
923 923 sedp->sed_base = voffset;
924 924
925 925 if (ehdr.e_type == ET_DYN) {
926 926 /*
927 927 * Delay setting the brkbase until the
928 928 * first call to brk(); see elfexec()
929 929 * for details.
930 930 */
931 931 env.ex_bssbase = (caddr_t)0;
932 932 env.ex_brkbase = (caddr_t)0;
933 933 env.ex_brksize = 0;
934 934 }
935 935 }
936 936 }
937 937
938 938 env.ex_magic = elfmagic;
939 939 env.ex_vp = vp;
940 940 setexecenv(&env);
941 941
942 942 /*
943 943 * It's time to manipulate the process aux vectors. First
944 944 * we need to update the AT_SUN_AUXFLAGS aux vector to set
945 945 * the AF_SUN_NOPLM flag.
946 946 */
947 947 if (args->to_model == DATAMODEL_NATIVE) {
948 948 auxv_t auxflags_auxv;
949 949
950 950 if (copyin(args->auxp_auxflags, &auxflags_auxv,
951 951 sizeof (auxflags_auxv)) != 0)
952 952 return (EFAULT);
953 953
954 954 ASSERT(auxflags_auxv.a_type == AT_SUN_AUXFLAGS);
955 955 auxflags_auxv.a_un.a_val |= AF_SUN_NOPLM;
956 956 if (copyout(&auxflags_auxv, args->auxp_auxflags,
957 957 sizeof (auxflags_auxv)) != 0)
958 958 return (EFAULT);
959 959 }
960 960 #if defined(_LP64)
961 961 else {
962 962 auxv32_t auxflags_auxv32;
963 963
964 964 if (copyin(args->auxp_auxflags, &auxflags_auxv32,
965 965 sizeof (auxflags_auxv32)) != 0)
966 966 return (EFAULT);
967 967
968 968 ASSERT(auxflags_auxv32.a_type == AT_SUN_AUXFLAGS);
969 969 auxflags_auxv32.a_un.a_val |= AF_SUN_NOPLM;
970 970 if (copyout(&auxflags_auxv32, args->auxp_auxflags,
971 971 sizeof (auxflags_auxv32)) != 0)
972 972 return (EFAULT);
973 973 }
974 974 #endif /* _LP64 */
975 975
976 976 /* Second, copy out the brand specific aux vectors. */
977 977 if (args->to_model == DATAMODEL_NATIVE) {
978 978 auxv_t brand_auxv[] = {
979 979 { AT_SUN_BRAND_AUX1, 0 },
980 980 { AT_SUN_BRAND_AUX2, 0 },
981 981 { AT_SUN_BRAND_AUX3, 0 }
982 982 };
983 983
984 984 ASSERT(brand_auxv[0].a_type ==
985 985 AT_SUN_BRAND_COMMON_LDDATA);
986 986 brand_auxv[0].a_un.a_val = sed.sed_lddata;
987 987
988 988 if (copyout(&brand_auxv, args->auxp_brand,
989 989 sizeof (brand_auxv)) != 0)
990 990 return (EFAULT);
991 991 }
992 992 #if defined(_LP64)
993 993 else {
994 994 auxv32_t brand_auxv32[] = {
995 995 { AT_SUN_BRAND_AUX1, 0 },
996 996 { AT_SUN_BRAND_AUX2, 0 },
997 997 { AT_SUN_BRAND_AUX3, 0 }
998 998 };
999 999
1000 1000 ASSERT(brand_auxv32[0].a_type == AT_SUN_BRAND_COMMON_LDDATA);
1001 1001 brand_auxv32[0].a_un.a_val = (uint32_t)sed.sed_lddata;
1002 1002 if (copyout(&brand_auxv32, args->auxp_brand,
1003 1003 sizeof (brand_auxv32)) != 0)
1004 1004 return (EFAULT);
1005 1005 }
1006 1006 #endif /* _LP64 */
1007 1007
1008 1008 /*
1009 1009 * Third, the /proc aux vectors set up by elfexec() point to
1010 1010 * brand emulation library and its linker. Copy these to the
1011 1011 * /proc brand specific aux vector, and update the regular
1012 1012 * /proc aux vectors to point to the executable (and its
1013 1013 * linker). This will enable debuggers to access the
1014 1014 * executable via the usual /proc or elf notes aux vectors.
1015 1015 *
1016 1016 * The brand emulation library's linker will get it's aux
1017 1017 * vectors off the stack, and then update the stack with the
1018 1018 * executable's aux vectors before jumping to the executable's
1019 1019 * linker.
1020 1020 *
1021 1021 * Debugging the brand emulation library must be done from
1022 1022 * the global zone, where the librtld_db module knows how to
1023 1023 * fetch the brand specific aux vectors to access the brand
1024 1024 * emulation libraries linker.
1025 1025 */
1026 1026 for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
1027 1027 ulong_t val;
1028 1028
1029 1029 switch (up->u_auxv[i].a_type) {
1030 1030 case AT_SUN_BRAND_COMMON_LDDATA:
1031 1031 up->u_auxv[i].a_un.a_val = sed.sed_lddata;
1032 1032 continue;
1033 1033 case AT_BASE:
1034 1034 val = sedp->sed_base;
1035 1035 break;
1036 1036 case AT_ENTRY:
1037 1037 val = sedp->sed_entry;
1038 1038 break;
1039 1039 case AT_PHDR:
1040 1040 val = sedp->sed_phdr;
1041 1041 break;
1042 1042 case AT_PHENT:
1043 1043 val = sedp->sed_phent;
1044 1044 break;
1045 1045 case AT_PHNUM:
1046 1046 val = sedp->sed_phnum;
1047 1047 break;
1048 1048 case AT_SUN_LDDATA:
1049 1049 val = sedp->sed_lddata;
1050 1050 break;
1051 1051 default:
1052 1052 continue;
1053 1053 }
1054 1054
1055 1055 up->u_auxv[i].a_un.a_val = val;
1056 1056 if (val == NULL) {
1057 1057 /* Hide the entry for static binaries */
1058 1058 up->u_auxv[i].a_type = AT_IGNORE;
1059 1059 }
1060 1060 }
1061 1061
1062 1062 /*
1063 1063 * The last thing we do here is clear spd->spd_handler. This
1064 1064 * is important because if we're already a branded process and
1065 1065 * if this exec succeeds, there is a window between when the
1066 1066 * exec() first returns to the userland of the new process and
1067 1067 * when our brand library get's initialized, during which we
1068 1068 * don't want system calls to be re-directed to our brand
1069 1069 * library since it hasn't been initialized yet.
1070 1070 */
1071 1071 spd->spd_handler = NULL;
1072 1072
1073 1073 return (0);
1074 1074 }
1075 1075
1076 1076 void
1077 1077 brand_solaris_exec(struct brand *pbrand)
1078 1078 {
1079 1079 brand_proc_data_t *spd = curproc->p_brand_data;
1080 1080
1081 1081 ASSERT(curproc->p_brand == pbrand);
1082 1082 ASSERT(curproc->p_brand_data != NULL);
1083 1083 ASSERT(ttolwp(curthread)->lwp_brand != NULL);
1084 1084
1085 1085 /*
1086 1086 * We should only be called from exec(), when we know the process
1087 1087 * is single-threaded.
1088 1088 */
1089 1089 ASSERT(curproc->p_tlist == curproc->p_tlist->t_forw);
1090 1090
1091 1091 /* Upon exec, reset our lwp brand data. */
1092 1092 (void) brand_solaris_freelwp(ttolwp(curthread), pbrand);
1093 1093 (void) brand_solaris_initlwp(ttolwp(curthread), pbrand);
1094 1094
1095 1095 /*
1096 1096 * Upon exec, reset all the proc brand data, except for the elf
1097 1097 * data associated with the executable we are exec'ing.
1098 1098 */
1099 1099 spd->spd_handler = NULL;
1100 1100 }
1101 1101
1102 1102 int
1103 1103 brand_solaris_fini(char **emul_table, struct modlinkage *modlinkage,
1104 1104 struct brand *pbrand)
1105 1105 {
1106 1106 int err;
1107 1107
1108 1108 /*
1109 1109 * If there are any zones using this brand, we can't allow it
1110 1110 * to be unloaded.
1111 1111 */
1112 1112 if (brand_zone_count(pbrand))
1113 1113 return (EBUSY);
1114 1114
1115 1115 kmem_free(*emul_table, NSYSCALL);
1116 1116 *emul_table = NULL;
1117 1117
1118 1118 err = mod_remove(modlinkage);
1119 1119 if (err)
1120 1120 cmn_err(CE_WARN, "Couldn't unload brand module");
1121 1121
1122 1122 return (err);
1123 1123 }
1124 1124
1125 1125 /*ARGSUSED*/
1126 1126 void
1127 1127 brand_solaris_forklwp(klwp_t *p, klwp_t *c, struct brand *pbrand)
1128 1128 {
1129 1129 ASSERT(p->lwp_procp->p_brand == pbrand);
1130 1130 ASSERT(c->lwp_procp->p_brand == pbrand);
1131 1131
1132 1132 ASSERT(p->lwp_procp->p_brand_data != NULL);
1133 1133 ASSERT(c->lwp_procp->p_brand_data != NULL);
1134 1134
1135 1135 /*
1136 1136 * Both LWPs have already had been initialized via
1137 1137 * brand_solaris_initlwp().
1138 1138 */
1139 1139 ASSERT(p->lwp_brand != NULL);
1140 1140 ASSERT(c->lwp_brand != NULL);
1141 1141 }
1142 1142
1143 1143 /*ARGSUSED*/
1144 1144 void
1145 1145 brand_solaris_freelwp(klwp_t *l, struct brand *pbrand)
1146 1146 {
1147 1147 ASSERT(l->lwp_procp->p_brand == pbrand);
1148 1148 ASSERT(l->lwp_procp->p_brand_data != NULL);
1149 1149 ASSERT(l->lwp_brand != NULL);
1150 1150 l->lwp_brand = NULL;
1151 1151 }
1152 1152
1153 1153 /*ARGSUSED*/
1154 1154 void
1155 1155 brand_solaris_initlwp(klwp_t *l, struct brand *pbrand)
1156 1156 {
1157 1157 ASSERT(l->lwp_procp->p_brand == pbrand);
1158 1158 ASSERT(l->lwp_procp->p_brand_data != NULL);
1159 1159 ASSERT(l->lwp_brand == NULL);
1160 1160 l->lwp_brand = (void *)-1;
1161 1161 }
1162 1162
1163 1163 /*ARGSUSED*/
1164 1164 void
1165 1165 brand_solaris_lwpexit(klwp_t *l, struct brand *pbrand)
1166 1166 {
1167 1167 ASSERT(l->lwp_procp->p_brand == pbrand);
1168 1168 ASSERT(l->lwp_procp->p_brand_data != NULL);
1169 1169 ASSERT(l->lwp_brand != NULL);
1170 1170 }
1171 1171
1172 1172 /*ARGSUSED*/
1173 1173 void
1174 1174 brand_solaris_proc_exit(struct proc *p, struct brand *pbrand)
1175 1175 {
1176 1176 ASSERT(p->p_brand == pbrand);
1177 1177 ASSERT(p->p_brand_data != NULL);
1178 1178
1179 1179 /* upon exit, free our proc brand data */
1180 1180 kmem_free(p->p_brand_data, sizeof (brand_proc_data_t));
1181 1181 p->p_brand_data = NULL;
1182 1182 }
1183 1183
1184 1184 void
1185 1185 brand_solaris_setbrand(proc_t *p, struct brand *pbrand)
1186 1186 {
1187 1187 ASSERT(p->p_brand == pbrand);
1188 1188 ASSERT(p->p_brand_data == NULL);
1189 1189
1190 1190 /*
1191 1191 * We should only be called from exec(), when we know the process
1192 1192 * is single-threaded.
1193 1193 */
1194 1194 ASSERT(p->p_tlist == p->p_tlist->t_forw);
1195 1195
1196 1196 p->p_brand_data = kmem_zalloc(sizeof (brand_proc_data_t), KM_SLEEP);
1197 1197 }
|
↓ open down ↓ |
1197 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX