1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2015, Joyent, Inc. All rights reserved.
  24  */
  25 
  26 #include <sys/kmem.h>
  27 #include <sys/errno.h>
  28 #include <sys/systm.h>
  29 #include <sys/cmn_err.h>
  30 #include <sys/brand.h>
  31 #include <sys/machbrand.h>
  32 #include <sys/modctl.h>
  33 #include <sys/rwlock.h>
  34 #include <sys/zone.h>
  35 #include <sys/pathname.h>
  36 
  37 #define SUPPORTED_BRAND_VERSION BRAND_VER_1
  38 
  39 #if defined(__sparcv9)
  40 /* sparcv9 uses system wide brand interposition hooks */
  41 static void brand_plat_interposition_enable(void);
  42 static void brand_plat_interposition_disable(void);
  43 
  44 struct brand_mach_ops native_mach_ops  = {
  45                 NULL, NULL
  46 };
  47 #else /* !__sparcv9 */
  48 struct brand_mach_ops native_mach_ops  = {
  49                 NULL, NULL, NULL, NULL, NULL, NULL, NULL
  50 };
  51 #endif /* !__sparcv9 */
  52 
  53 brand_t native_brand = {
  54                 BRAND_VER_1,
  55                 "native",
  56                 NULL,
  57                 &native_mach_ops,
  58                 0
  59 };
  60 
  61 /*
  62  * Used to maintain a list of all the brands currently loaded into the
  63  * kernel.
  64  */
  65 struct brand_list {
  66         int                     bl_refcnt;
  67         struct brand_list       *bl_next;
  68         brand_t                 *bl_brand;
  69 };
  70 
  71 static struct brand_list *brand_list = NULL;
  72 
  73 /*
  74  * This lock protects the integrity of the brand list.
  75  */
  76 static kmutex_t brand_list_lock;
  77 
  78 void
  79 brand_init()
  80 {
  81         mutex_init(&brand_list_lock, NULL, MUTEX_DEFAULT, NULL);
  82         p0.p_brand = &native_brand;
  83 }
  84 
  85 int
  86 brand_register(brand_t *brand)
  87 {
  88         struct brand_list *list, *scan;
  89 
  90         if (brand == NULL)
  91                 return (EINVAL);
  92 
  93         if (brand->b_version != SUPPORTED_BRAND_VERSION) {
  94                 if (brand->b_version < SUPPORTED_BRAND_VERSION) {
  95                         cmn_err(CE_WARN,
  96                             "brand '%s' was built to run on older versions "
  97                             "of Solaris.",
  98                             brand->b_name);
  99                 } else {
 100                         cmn_err(CE_WARN,
 101                             "brand '%s' was built to run on a newer version "
 102                             "of Solaris.",
 103                             brand->b_name);
 104                 }
 105                 return (EINVAL);
 106         }
 107 
 108         /* Sanity checks */
 109         if (brand->b_name == NULL || brand->b_ops == NULL ||
 110             brand->b_ops->b_brandsys == NULL) {
 111                 cmn_err(CE_WARN, "Malformed brand");
 112                 return (EINVAL);
 113         }
 114 
 115         list = kmem_alloc(sizeof (struct brand_list), KM_SLEEP);
 116 
 117         /* Add the brand to the list of loaded brands. */
 118         mutex_enter(&brand_list_lock);
 119 
 120         /*
 121          * Check to be sure we haven't already registered this brand.
 122          */
 123         for (scan = brand_list; scan != NULL; scan = scan->bl_next) {
 124                 if (strcmp(brand->b_name, scan->bl_brand->b_name) == 0) {
 125                         cmn_err(CE_WARN,
 126                             "Invalid attempt to load a second instance of "
 127                             "brand %s", brand->b_name);
 128                         mutex_exit(&brand_list_lock);
 129                         kmem_free(list, sizeof (struct brand_list));
 130                         return (EINVAL);
 131                 }
 132         }
 133 
 134 #if defined(__sparcv9)
 135         /* sparcv9 uses system wide brand interposition hooks */
 136         if (brand_list == NULL)
 137                 brand_plat_interposition_enable();
 138 #endif /* __sparcv9 */
 139 
 140         list->bl_brand = brand;
 141         list->bl_refcnt = 0;
 142         list->bl_next = brand_list;
 143         brand_list = list;
 144 
 145         mutex_exit(&brand_list_lock);
 146 
 147         return (0);
 148 }
 149 
 150 /*
 151  * The kernel module implementing this brand is being unloaded, so remove
 152  * it from the list of active brands.
 153  */
 154 int
 155 brand_unregister(brand_t *brand)
 156 {
 157         struct brand_list *list, *prev;
 158 
 159         /* Sanity checks */
 160         if (brand == NULL || brand->b_name == NULL) {
 161                 cmn_err(CE_WARN, "Malformed brand");
 162                 return (EINVAL);
 163         }
 164 
 165         prev = NULL;
 166         mutex_enter(&brand_list_lock);
 167 
 168         for (list = brand_list; list != NULL; list = list->bl_next) {
 169                 if (list->bl_brand == brand)
 170                         break;
 171                 prev = list;
 172         }
 173 
 174         if (list == NULL) {
 175                 cmn_err(CE_WARN, "Brand %s wasn't registered", brand->b_name);
 176                 mutex_exit(&brand_list_lock);
 177                 return (EINVAL);
 178         }
 179 
 180         if (list->bl_refcnt > 0) {
 181                 cmn_err(CE_WARN, "Unregistering brand %s which is still in use",
 182                     brand->b_name);
 183                 mutex_exit(&brand_list_lock);
 184                 return (EBUSY);
 185         }
 186 
 187         /* Remove brand from the list */
 188         if (prev != NULL)
 189                 prev->bl_next = list->bl_next;
 190         else
 191                 brand_list = list->bl_next;
 192 
 193 #if defined(__sparcv9)
 194         /* sparcv9 uses system wide brand interposition hooks */
 195         if (brand_list == NULL)
 196                 brand_plat_interposition_disable();
 197 #endif /* __sparcv9 */
 198 
 199         mutex_exit(&brand_list_lock);
 200 
 201         kmem_free(list, sizeof (struct brand_list));
 202 
 203         return (0);
 204 }
 205 
 206 /*
 207  * Record that a zone of this brand has been instantiated.  If the kernel
 208  * module implementing this brand's functionality is not present, this
 209  * routine attempts to load the module as a side effect.
 210  */
 211 brand_t *
 212 brand_register_zone(struct brand_attr *attr)
 213 {
 214         struct brand_list *l = NULL;
 215         ddi_modhandle_t hdl = NULL;
 216         char *modname;
 217         int err = 0;
 218 
 219         if (is_system_labeled()) {
 220                 cmn_err(CE_WARN,
 221                     "Branded zones are not allowed on labeled systems.");
 222                 return (NULL);
 223         }
 224 
 225         /*
 226          * We make at most two passes through this loop.  The first time
 227          * through, we're looking to see if this is a new user of an
 228          * already loaded brand.  If the brand hasn't been loaded, we
 229          * call ddi_modopen() to force it to be loaded and then make a
 230          * second pass through the list of brands.  If we don't find the
 231          * brand the second time through it means that the modname
 232          * specified in the brand_attr structure doesn't provide the brand
 233          * specified in the brandname field.  This would suggest a bug in
 234          * the brand's config.xml file.  We close the module and return
 235          * 'NULL' to the caller.
 236          */
 237         for (;;) {
 238                 /*
 239                  * Search list of loaded brands
 240                  */
 241                 mutex_enter(&brand_list_lock);
 242                 for (l = brand_list; l != NULL; l = l->bl_next)
 243                         if (strcmp(attr->ba_brandname,
 244                             l->bl_brand->b_name) == 0)
 245                                 break;
 246                 if ((l != NULL) || (hdl != NULL))
 247                         break;
 248                 mutex_exit(&brand_list_lock);
 249 
 250                 /*
 251                  * We didn't find that the requested brand has been loaded
 252                  * yet, so we trigger the load of the appropriate kernel
 253                  * module and search the list again.
 254                  */
 255                 modname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 256                 (void) strcpy(modname, "brand/");
 257                 (void) strcat(modname, attr->ba_modname);
 258                 hdl = ddi_modopen(modname, KRTLD_MODE_FIRST, &err);
 259                 kmem_free(modname, MAXPATHLEN);
 260 
 261                 if (err != 0)
 262                         return (NULL);
 263         }
 264 
 265         /*
 266          * If we found the matching brand, bump its reference count.
 267          */
 268         if (l != NULL)
 269                 l->bl_refcnt++;
 270 
 271         mutex_exit(&brand_list_lock);
 272 
 273         if (hdl != NULL)
 274                 (void) ddi_modclose(hdl);
 275 
 276         return ((l != NULL) ? l->bl_brand : NULL);
 277 }
 278 
 279 /*
 280  * Return the number of zones currently using this brand.
 281  */
 282 int
 283 brand_zone_count(struct brand *bp)
 284 {
 285         struct brand_list *l;
 286         int cnt = 0;
 287 
 288         mutex_enter(&brand_list_lock);
 289         for (l = brand_list; l != NULL; l = l->bl_next)
 290                 if (l->bl_brand == bp) {
 291                         cnt = l->bl_refcnt;
 292                         break;
 293                 }
 294         mutex_exit(&brand_list_lock);
 295 
 296         return (cnt);
 297 }
 298 
 299 void
 300 brand_unregister_zone(struct brand *bp)
 301 {
 302         struct brand_list *list;
 303 
 304         mutex_enter(&brand_list_lock);
 305         for (list = brand_list; list != NULL; list = list->bl_next) {
 306                 if (list->bl_brand == bp) {
 307                         ASSERT(list->bl_refcnt > 0);
 308                         list->bl_refcnt--;
 309                         break;
 310                 }
 311         }
 312         mutex_exit(&brand_list_lock);
 313 }
 314 
 315 int
 316 brand_setbrand(proc_t *p, boolean_t lwps_ok)
 317 {
 318         brand_t *bp = p->p_zone->zone_brand;
 319         void *brand_data = NULL;
 320 
 321         VERIFY(MUTEX_NOT_HELD(&p->p_lock));
 322         VERIFY(bp != NULL);
 323 
 324         /*
 325          * Process branding occurs during fork() and exec().  When it happens
 326          * during fork(), the LWP count will always be 0 since branding is
 327          * performed as part of getproc(), before LWPs have been associated.
 328          * The same is not true during exec(), where a multi-LWP process may
 329          * undergo branding just prior to gexec(). This is to ensure
 330          * exec-related brand hooks are available.  While it may seem
 331          * complicated to brand a multi-LWP process, the two possible outcomes
 332          * simplify things:
 333          *
 334          * 1. The exec() succeeds:  LWPs besides the caller will be killed and
 335          *    any further branding will occur in a single-LWP context.
 336          * 2. The exec() fails: The process will be promptly unbranded since
 337          *    the hooks are no longer needed.
 338          *
 339          * To prevent inconsistent brand state from being encountered during
 340          * the exec(), LWPs beyond the caller which are associated with this
 341          * process must be held temporarily.  They will be released either when
 342          * they are killed in the exec() success, or when the brand is cleared
 343          * after exec() failure.
 344          */
 345         if (lwps_ok) {
 346                 /*
 347                  * We've been called from a exec() context tolerating the
 348                  * existence of multiple LWPs during branding is necessary.
 349                  */
 350                 VERIFY(p == curproc);
 351                 VERIFY(p->p_tlist != NULL);
 352 
 353                 if (p->p_tlist != p->p_tlist->t_forw) {
 354                         /*
 355                          * Multiple LWPs are present.  Hold all but the caller.
 356                          */
 357                         if (!holdlwps(SHOLDFORK1)) {
 358                                 return (-1);
 359                         }
 360                 }
 361         } else {
 362                 /*
 363                  * Processes branded during fork() should not have LWPs at all.
 364                  */
 365                 VERIFY(p->p_tlist == NULL);
 366         }
 367 
 368         if (bp->b_data_size > 0) {
 369                 brand_data = kmem_zalloc(bp->b_data_size, KM_SLEEP);
 370         }
 371 
 372         mutex_enter(&p->p_lock);
 373         ASSERT(!PROC_IS_BRANDED(p));
 374         p->p_brand = bp;
 375         p->p_brand_data = brand_data;
 376         ASSERT(PROC_IS_BRANDED(p));
 377         BROP(p)->b_setbrand(p);
 378         mutex_exit(&p->p_lock);
 379         return (0);
 380 }
 381 
 382 void
 383 brand_clearbrand(proc_t *p, boolean_t lwps_ok)
 384 {
 385         brand_t *bp = p->p_zone->zone_brand;
 386         void *brand_data;
 387 
 388         VERIFY(MUTEX_NOT_HELD(&p->p_lock));
 389         VERIFY(bp != NULL);
 390         VERIFY(PROC_IS_BRANDED(p));
 391 
 392         mutex_enter(&p->p_lock);
 393         p->p_brand = &native_brand;
 394         brand_data = p->p_brand_data;
 395         p->p_brand_data = NULL;
 396 
 397         if (lwps_ok) {
 398                 VERIFY(p == curproc);
 399                 /*
 400                  * A process with multiple LWPs is being de-branded after
 401                  * failing an exec.  The other LWPs were held as part of the
 402                  * procedure, so they must be resumed now.
 403                  */
 404                 if (p->p_tlist != NULL && p->p_tlist != p->p_tlist->t_forw) {
 405                         continuelwps(p);
 406                 }
 407         } else {
 408                 /*
 409                  * While clearing the brand, it's ok for one LWP to be present.
 410                  * This happens when a native binary is executed inside a
 411                  * branded zone, since the brand will be removed during the
 412                  * course of a successful exec.
 413                  */
 414                 VERIFY(p->p_tlist == NULL || p->p_tlist == p->p_tlist->t_forw);
 415         }
 416         mutex_exit(&p->p_lock);
 417 
 418         if (brand_data != NULL) {
 419                 kmem_free(brand_data, bp->b_data_size);
 420         }
 421 }
 422 
 423 #if defined(__sparcv9)
 424 /*
 425  * Currently, only sparc has system level brand syscall interposition.
 426  * On x86 we're able to enable syscall interposition on a per-cpu basis
 427  * when a branded thread is scheduled to run on a cpu.
 428  */
 429 
 430 /* Local variables needed for dynamic syscall interposition support */
 431 static uint32_t syscall_trap_patch_instr_orig;
 432 static uint32_t syscall_trap32_patch_instr_orig;
 433 
 434 /* Trap Table syscall entry hot patch points */
 435 extern void     syscall_trap_patch_point(void);
 436 extern void     syscall_trap32_patch_point(void);
 437 
 438 /* Alternate syscall entry handlers used when branded zones are running */
 439 extern void     syscall_wrapper(void);
 440 extern void     syscall_wrapper32(void);
 441 
 442 /* Macros used to facilitate sparcv9 instruction generation */
 443 #define BA_A_INSTR      0x30800000      /* ba,a addr */
 444 #define DISP22(from, to) \
 445         ((((uintptr_t)(to) - (uintptr_t)(from)) >> 2) & 0x3fffff)
 446 
 447 /*ARGSUSED*/
 448 static void
 449 brand_plat_interposition_enable(void)
 450 {
 451         ASSERT(MUTEX_HELD(&brand_list_lock));
 452 
 453         /*
 454          * Before we hot patch the kernel save the current instructions
 455          * so that we can restore them later.
 456          */
 457         syscall_trap_patch_instr_orig =
 458             *(uint32_t *)syscall_trap_patch_point;
 459         syscall_trap32_patch_instr_orig =
 460             *(uint32_t *)syscall_trap32_patch_point;
 461 
 462         /*
 463          * Modify the trap table at the patch points.
 464          *
 465          * We basically replace the first instruction at the patch
 466          * point with a ba,a instruction that will transfer control
 467          * to syscall_wrapper or syscall_wrapper32 for 64-bit and
 468          * 32-bit syscalls respectively.  It's important to note that
 469          * the annul bit is set in the branch so we don't execute
 470          * the instruction directly following the one we're patching
 471          * during the branch's delay slot.
 472          *
 473          * It also doesn't matter that we're not atomically updating both
 474          * the 64 and 32 bit syscall paths at the same time since there's
 475          * no actual branded processes running on the system yet.
 476          */
 477         hot_patch_kernel_text((caddr_t)syscall_trap_patch_point,
 478             BA_A_INSTR | DISP22(syscall_trap_patch_point, syscall_wrapper),
 479             4);
 480         hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point,
 481             BA_A_INSTR | DISP22(syscall_trap32_patch_point, syscall_wrapper32),
 482             4);
 483 }
 484 
 485 /*ARGSUSED*/
 486 static void
 487 brand_plat_interposition_disable(void)
 488 {
 489         ASSERT(MUTEX_HELD(&brand_list_lock));
 490 
 491         /*
 492          * Restore the original instructions at the trap table syscall
 493          * patch points to disable the brand syscall interposition
 494          * mechanism.
 495          */
 496         hot_patch_kernel_text((caddr_t)syscall_trap_patch_point,
 497             syscall_trap_patch_instr_orig, 4);
 498         hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point,
 499             syscall_trap32_patch_instr_orig, 4);
 500 }
 501 #endif /* __sparcv9 */
 502 
 503 /*
 504  * The following functions can be shared among kernel brand modules which
 505  * implement Solaris-derived brands, all of which need to do similar tasks
 506  * to manage the brand.
 507  */
 508 
 509 #if defined(_LP64)
 510 static void
 511 Ehdr32to64(Elf32_Ehdr *src, Ehdr *dst)
 512 {
 513         bcopy(src->e_ident, dst->e_ident, sizeof (src->e_ident));
 514         dst->e_type =                src->e_type;
 515         dst->e_machine =     src->e_machine;
 516         dst->e_version =     src->e_version;
 517         dst->e_entry =               src->e_entry;
 518         dst->e_phoff =               src->e_phoff;
 519         dst->e_shoff =               src->e_shoff;
 520         dst->e_flags =               src->e_flags;
 521         dst->e_ehsize =              src->e_ehsize;
 522         dst->e_phentsize =   src->e_phentsize;
 523         dst->e_phnum =               src->e_phnum;
 524         dst->e_shentsize =   src->e_shentsize;
 525         dst->e_shnum =               src->e_shnum;
 526         dst->e_shstrndx =    src->e_shstrndx;
 527 }
 528 #endif /* _LP64 */
 529 
 530 /*
 531  * Return -1 if the cmd was not handled by this function.
 532  */
 533 /*ARGSUSED*/
 534 int
 535 brand_solaris_cmd(int cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3,
 536     struct brand *pbrand, int brandvers)
 537 {
 538         brand_proc_data_t       *spd;
 539         brand_proc_reg_t        reg;
 540         proc_t                  *p = curproc;
 541         int                     err;
 542 
 543         /*
 544          * There is one operation that is supported for a native
 545          * process; B_EXEC_BRAND.  This brand operaion is redundant
 546          * since the kernel assumes a native process doing an exec
 547          * in a branded zone is going to run a branded processes.
 548          * hence we don't support this operation.
 549          */
 550         if (cmd == B_EXEC_BRAND)
 551                 return (ENOSYS);
 552 
 553         /* For all other operations this must be a branded process. */
 554         if (!PROC_IS_BRANDED(p))
 555                 return (ENOSYS);
 556 
 557         ASSERT(p->p_brand == pbrand);
 558         ASSERT(p->p_brand_data != NULL);
 559 
 560         spd = (brand_proc_data_t *)p->p_brand_data;
 561 
 562         switch ((cmd)) {
 563         case B_EXEC_NATIVE:
 564                 err = exec_common((char *)arg1, (const char **)arg2,
 565                     (const char **)arg3, EBA_NATIVE);
 566                 return (err);
 567 
 568         /*
 569          * Get the address of the user-space system call handler from
 570          * the user process and attach it to the proc structure.
 571          */
 572         case B_REGISTER:
 573                 if (p->p_model == DATAMODEL_NATIVE) {
 574                         if (copyin((void *)arg1, ®, sizeof (reg)) != 0)
 575                                 return (EFAULT);
 576                 }
 577 #if defined(_LP64)
 578                 else {
 579                         brand_common_reg32_t reg32;
 580 
 581                         if (copyin((void *)arg1, ®32, sizeof (reg32)) != 0)
 582                                 return (EFAULT);
 583                         reg.sbr_version = reg32.sbr_version;
 584                         reg.sbr_handler = (caddr_t)(uintptr_t)reg32.sbr_handler;
 585                 }
 586 #endif /* _LP64 */
 587 
 588                 if (reg.sbr_version != brandvers)
 589                         return (ENOTSUP);
 590                 spd->spd_handler = reg.sbr_handler;
 591                 return (0);
 592 
 593         case B_ELFDATA:
 594                 if (p->p_model == DATAMODEL_NATIVE) {
 595                         if (copyout(&spd->spd_elf_data, (void *)arg1,
 596                             sizeof (brand_elf_data_t)) != 0)
 597                                 return (EFAULT);
 598                 }
 599 #if defined(_LP64)
 600                 else {
 601                         brand_elf_data32_t sed32;
 602 
 603                         sed32.sed_phdr = spd->spd_elf_data.sed_phdr;
 604                         sed32.sed_phent = spd->spd_elf_data.sed_phent;
 605                         sed32.sed_phnum = spd->spd_elf_data.sed_phnum;
 606                         sed32.sed_entry = spd->spd_elf_data.sed_entry;
 607                         sed32.sed_base = spd->spd_elf_data.sed_base;
 608                         sed32.sed_ldentry = spd->spd_elf_data.sed_ldentry;
 609                         sed32.sed_lddata = spd->spd_elf_data.sed_lddata;
 610                         if (copyout(&sed32, (void *)arg1, sizeof (sed32))
 611                             != 0)
 612                                 return (EFAULT);
 613                 }
 614 #endif /* _LP64 */
 615                 return (0);
 616 
 617         /*
 618          * The B_TRUSS_POINT subcommand exists so that we can see
 619          * truss output from interposed system calls that return
 620          * without first calling any other system call, meaning they
 621          * would be invisible to truss(1).
 622          * If the second argument is set non-zero, set errno to that
 623          * value as well.
 624          *
 625          * Common arguments seen with truss are:
 626          *
 627          *      arg1: syscall number
 628          *      arg2: errno
 629          */
 630         case B_TRUSS_POINT:
 631                 return ((arg2 == 0) ? 0 : set_errno((uint_t)arg2));
 632         }
 633 
 634         return (-1);
 635 }
 636 
 637 /*ARGSUSED*/
 638 void
 639 brand_solaris_copy_procdata(proc_t *child, proc_t *parent, struct brand *pbrand)
 640 {
 641         brand_proc_data_t       *spd;
 642 
 643         ASSERT(parent->p_brand == pbrand);
 644         ASSERT(child->p_brand == pbrand);
 645         ASSERT(parent->p_brand_data != NULL);
 646         ASSERT(child->p_brand_data == NULL);
 647 
 648         /*
 649          * Just duplicate all the proc data of the parent for the
 650          * child
 651          */
 652         spd = kmem_alloc(sizeof (brand_proc_data_t), KM_SLEEP);
 653         bcopy(parent->p_brand_data, spd, sizeof (brand_proc_data_t));
 654         child->p_brand_data = spd;
 655 }
 656 
 657 static void
 658 restoreexecenv(struct execenv *ep, stack_t *sp)
 659 {
 660         klwp_t *lwp = ttolwp(curthread);
 661 
 662         setexecenv(ep);
 663         lwp->lwp_sigaltstack.ss_sp = sp->ss_sp;
 664         lwp->lwp_sigaltstack.ss_size = sp->ss_size;
 665         lwp->lwp_sigaltstack.ss_flags = sp->ss_flags;
 666 }
 667 
 668 /*ARGSUSED*/
 669 int
 670 brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
 671     intpdata_t *idatap, int level, long *execsz, int setid, caddr_t exec_file,
 672     cred_t *cred, int *brand_action, struct brand *pbrand, char *bname,
 673     char *brandlib, char *brandlib32)
 674 {
 675 
 676         vnode_t         *nvp;
 677         Ehdr            ehdr;
 678         Addr            uphdr_vaddr;
 679         intptr_t        voffset;
 680         char            *interp;
 681         int             i, err;
 682         struct execenv  env;
 683         struct execenv  origenv;
 684         stack_t         orig_sigaltstack;
 685         struct user     *up = PTOU(curproc);
 686         proc_t          *p = ttoproc(curthread);
 687         klwp_t          *lwp = ttolwp(curthread);
 688         brand_proc_data_t       *spd;
 689         brand_elf_data_t sed, *sedp;
 690         uintptr_t       lddata; /* lddata of executable's linker */
 691 
 692         ASSERT(curproc->p_brand == pbrand);
 693         ASSERT(curproc->p_brand_data != NULL);
 694 
 695         spd = (brand_proc_data_t *)curproc->p_brand_data;
 696         sedp = &spd->spd_elf_data;
 697 
 698         args->brandname = bname;
 699 
 700         /*
 701          * We will exec the brand library and then map in the target
 702          * application and (optionally) the brand's default linker.
 703          */
 704         if (args->to_model == DATAMODEL_NATIVE) {
 705                 args->emulator = brandlib;
 706         }
 707 #if defined(_LP64)
 708         else {
 709                 args->emulator = brandlib32;
 710         }
 711 #endif  /* _LP64 */
 712 
 713         if ((err = lookupname(args->emulator, UIO_SYSSPACE, FOLLOW,
 714             NULLVPP, &nvp)) != 0) {
 715                 uprintf("%s: not found.", args->emulator);
 716                 return (err);
 717         }
 718 
 719         /*
 720          * The following elf{32}exec call changes the execenv in the proc
 721          * struct which includes changing the p_exec member to be the vnode
 722          * for the brand library (e.g. /.SUNWnative/usr/lib/s10_brand.so.1).
 723          * We will eventually set the p_exec member to be the vnode for the new
 724          * executable when we call setexecenv().  However, if we get an error
 725          * before that call we need to restore the execenv to its original
 726          * values so that when we return to the caller fop_close() works
 727          * properly while cleaning up from the failed exec().  Restoring the
 728          * original value will also properly decrement the 2nd VN_RELE that we
 729          * took on the brand library.
 730          */
 731         origenv.ex_bssbase = p->p_bssbase;
 732         origenv.ex_brkbase = p->p_brkbase;
 733         origenv.ex_brksize = p->p_brksize;
 734         origenv.ex_vp = p->p_exec;
 735         orig_sigaltstack.ss_sp = lwp->lwp_sigaltstack.ss_sp;
 736         orig_sigaltstack.ss_size = lwp->lwp_sigaltstack.ss_size;
 737         orig_sigaltstack.ss_flags = lwp->lwp_sigaltstack.ss_flags;
 738 
 739         if (args->to_model == DATAMODEL_NATIVE) {
 740                 err = elfexec(nvp, uap, args, idatap, INTP_MAXDEPTH + 1, execsz,
 741                     setid, exec_file, cred, brand_action);
 742         }
 743 #if defined(_LP64)
 744         else {
 745                 err = elf32exec(nvp, uap, args, idatap, INTP_MAXDEPTH + 1,
 746                     execsz, setid, exec_file, cred, brand_action);
 747         }
 748 #endif  /* _LP64 */
 749         VN_RELE(nvp);
 750         if (err != 0) {
 751                 restoreexecenv(&origenv, &orig_sigaltstack);
 752                 return (err);
 753         }
 754 
 755         /*
 756          * The u_auxv veCTors are set up by elfexec to point to the
 757          * brand emulation library and linker.  Save these so they can
 758          * be copied to the specific brand aux vectors.
 759          */
 760         bzero(&sed, sizeof (sed));
 761         for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
 762                 switch (up->u_auxv[i].a_type) {
 763                 case AT_SUN_LDDATA:
 764                         sed.sed_lddata = up->u_auxv[i].a_un.a_val;
 765                         break;
 766                 case AT_BASE:
 767                         sed.sed_base = up->u_auxv[i].a_un.a_val;
 768                         break;
 769                 case AT_ENTRY:
 770                         sed.sed_entry = up->u_auxv[i].a_un.a_val;
 771                         break;
 772                 case AT_PHDR:
 773                         sed.sed_phdr = up->u_auxv[i].a_un.a_val;
 774                         break;
 775                 case AT_PHENT:
 776                         sed.sed_phent = up->u_auxv[i].a_un.a_val;
 777                         break;
 778                 case AT_PHNUM:
 779                         sed.sed_phnum = up->u_auxv[i].a_un.a_val;
 780                         break;
 781                 default:
 782                         break;
 783                 }
 784         }
 785         /* Make sure the emulator has an entry point */
 786         ASSERT(sed.sed_entry != NULL);
 787         ASSERT(sed.sed_phdr != NULL);
 788 
 789         bzero(&env, sizeof (env));
 790         if (args->to_model == DATAMODEL_NATIVE) {
 791                 err = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr,
 792                     &voffset, exec_file, &interp, &env.ex_bssbase,
 793                     &env.ex_brkbase, &env.ex_brksize, NULL, NULL);
 794         }
 795 #if defined(_LP64)
 796         else {
 797                 Elf32_Ehdr ehdr32;
 798                 Elf32_Addr uphdr_vaddr32;
 799                 err = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32,
 800                     &voffset, exec_file, &interp, &env.ex_bssbase,
 801                     &env.ex_brkbase, &env.ex_brksize, NULL, NULL);
 802                 Ehdr32to64(&ehdr32, &ehdr);
 803 
 804                 if (uphdr_vaddr32 == (Elf32_Addr)-1)
 805                         uphdr_vaddr = (Addr)-1;
 806                 else
 807                         uphdr_vaddr = uphdr_vaddr32;
 808         }
 809 #endif  /* _LP64 */
 810         if (err != 0) {
 811                 restoreexecenv(&origenv, &orig_sigaltstack);
 812 
 813                 if (interp != NULL)
 814                         kmem_free(interp, MAXPATHLEN);
 815 
 816                 return (err);
 817         }
 818 
 819         /*
 820          * Save off the important properties of the executable. The
 821          * brand library will ask us for this data later, when it is
 822          * initializing and getting ready to transfer control to the
 823          * brand application.
 824          */
 825         if (uphdr_vaddr == (Addr)-1)
 826                 sedp->sed_phdr = voffset + ehdr.e_phoff;
 827         else
 828                 sedp->sed_phdr = voffset + uphdr_vaddr;
 829         sedp->sed_entry = voffset + ehdr.e_entry;
 830         sedp->sed_phent = ehdr.e_phentsize;
 831         sedp->sed_phnum = ehdr.e_phnum;
 832 
 833         if (interp != NULL) {
 834                 if (ehdr.e_type == ET_DYN) {
 835                         /*
 836                          * This is a shared object executable, so we
 837                          * need to pick a reasonable place to put the
 838                          * heap. Just don't use the first page.
 839                          */
 840                         env.ex_brkbase = (caddr_t)PAGESIZE;
 841                         env.ex_bssbase = (caddr_t)PAGESIZE;
 842                 }
 843 
 844                 /*
 845                  * If the program needs an interpreter (most do), map
 846                  * it in and store relevant information about it in the
 847                  * aux vector, where the brand library can find it.
 848                  */
 849                 if ((err = lookupname(interp, UIO_SYSSPACE,
 850                     FOLLOW, NULLVPP, &nvp)) != 0) {
 851                         uprintf("%s: not found.", interp);
 852                         restoreexecenv(&origenv, &orig_sigaltstack);
 853                         kmem_free(interp, MAXPATHLEN);
 854                         return (err);
 855                 }
 856 
 857                 kmem_free(interp, MAXPATHLEN);
 858 
 859                 if (args->to_model == DATAMODEL_NATIVE) {
 860                         err = mapexec_brand(nvp, args, &ehdr,
 861                             &uphdr_vaddr, &voffset, exec_file, &interp,
 862                             NULL, NULL, NULL, &lddata, NULL);
 863                 }
 864 #if defined(_LP64)
 865                 else {
 866                         Elf32_Ehdr ehdr32;
 867                         Elf32_Addr uphdr_vaddr32;
 868                         err = mapexec32_brand(nvp, args, &ehdr32,
 869                             &uphdr_vaddr32, &voffset, exec_file, &interp,
 870                             NULL, NULL, NULL, &lddata, NULL);
 871                         Ehdr32to64(&ehdr32, &ehdr);
 872 
 873                         if (uphdr_vaddr32 == (Elf32_Addr)-1)
 874                                 uphdr_vaddr = (Addr)-1;
 875                         else
 876                                 uphdr_vaddr = uphdr_vaddr32;
 877                 }
 878 #endif  /* _LP64 */
 879                 VN_RELE(nvp);
 880                 if (err != 0) {
 881                         restoreexecenv(&origenv, &orig_sigaltstack);
 882                         return (err);
 883                 }
 884 
 885                 /*
 886                  * Now that we know the base address of the brand's
 887                  * linker, place it in the aux vector.
 888                  */
 889                 sedp->sed_base = voffset;
 890                 sedp->sed_ldentry = voffset + ehdr.e_entry;
 891                 sedp->sed_lddata = voffset + lddata;
 892         } else {
 893                 /*
 894                  * This program has no interpreter. The brand library
 895                  * will jump to the address in the AT_SUN_BRAND_LDENTRY
 896                  * aux vector, so in this case, put the entry point of
 897                  * the main executable there.
 898                  */
 899                 if (ehdr.e_type == ET_EXEC) {
 900                         /*
 901                          * An executable with no interpreter, this must
 902                          * be a statically linked executable, which
 903                          * means we loaded it at the address specified
 904                          * in the elf header, in which case the e_entry
 905                          * field of the elf header is an absolute
 906                          * address.
 907                          */
 908                         sedp->sed_ldentry = ehdr.e_entry;
 909                         sedp->sed_entry = ehdr.e_entry;
 910                         sedp->sed_lddata = NULL;
 911                         sedp->sed_base = NULL;
 912                 } else {
 913                         /*
 914                          * A shared object with no interpreter, we use
 915                          * the calculated address from above.
 916                          */
 917                         sedp->sed_ldentry = sedp->sed_entry;
 918                         sedp->sed_entry = NULL;
 919                         sedp->sed_phdr = NULL;
 920                         sedp->sed_phent = NULL;
 921                         sedp->sed_phnum = NULL;
 922                         sedp->sed_lddata = NULL;
 923                         sedp->sed_base = voffset;
 924 
 925                         if (ehdr.e_type == ET_DYN) {
 926                                 /*
 927                                  * Delay setting the brkbase until the
 928                                  * first call to brk(); see elfexec()
 929                                  * for details.
 930                                  */
 931                                 env.ex_bssbase = (caddr_t)0;
 932                                 env.ex_brkbase = (caddr_t)0;
 933                                 env.ex_brksize = 0;
 934                         }
 935                 }
 936         }
 937 
 938         env.ex_magic = elfmagic;
 939         env.ex_vp = vp;
 940         setexecenv(&env);
 941 
 942         /*
 943          * It's time to manipulate the process aux vectors.  First
 944          * we need to update the AT_SUN_AUXFLAGS aux vector to set
 945          * the AF_SUN_NOPLM flag.
 946          */
 947         if (args->to_model == DATAMODEL_NATIVE) {
 948                 auxv_t          auxflags_auxv;
 949 
 950                 if (copyin(args->auxp_auxflags, &auxflags_auxv,
 951                     sizeof (auxflags_auxv)) != 0)
 952                         return (EFAULT);
 953 
 954                 ASSERT(auxflags_auxv.a_type == AT_SUN_AUXFLAGS);
 955                 auxflags_auxv.a_un.a_val |= AF_SUN_NOPLM;
 956                 if (copyout(&auxflags_auxv, args->auxp_auxflags,
 957                     sizeof (auxflags_auxv)) != 0)
 958                         return (EFAULT);
 959         }
 960 #if defined(_LP64)
 961         else {
 962                 auxv32_t        auxflags_auxv32;
 963 
 964                 if (copyin(args->auxp_auxflags, &auxflags_auxv32,
 965                     sizeof (auxflags_auxv32)) != 0)
 966                         return (EFAULT);
 967 
 968                 ASSERT(auxflags_auxv32.a_type == AT_SUN_AUXFLAGS);
 969                 auxflags_auxv32.a_un.a_val |= AF_SUN_NOPLM;
 970                 if (copyout(&auxflags_auxv32, args->auxp_auxflags,
 971                     sizeof (auxflags_auxv32)) != 0)
 972                         return (EFAULT);
 973         }
 974 #endif  /* _LP64 */
 975 
 976         /* Second, copy out the brand specific aux vectors. */
 977         if (args->to_model == DATAMODEL_NATIVE) {
 978                 auxv_t brand_auxv[] = {
 979                     { AT_SUN_BRAND_AUX1, 0 },
 980                     { AT_SUN_BRAND_AUX2, 0 },
 981                     { AT_SUN_BRAND_AUX3, 0 }
 982                 };
 983 
 984                 ASSERT(brand_auxv[0].a_type ==
 985                     AT_SUN_BRAND_COMMON_LDDATA);
 986                 brand_auxv[0].a_un.a_val = sed.sed_lddata;
 987 
 988                 if (copyout(&brand_auxv, args->auxp_brand,
 989                     sizeof (brand_auxv)) != 0)
 990                         return (EFAULT);
 991         }
 992 #if defined(_LP64)
 993         else {
 994                 auxv32_t brand_auxv32[] = {
 995                     { AT_SUN_BRAND_AUX1, 0 },
 996                     { AT_SUN_BRAND_AUX2, 0 },
 997                     { AT_SUN_BRAND_AUX3, 0 }
 998                 };
 999 
1000                 ASSERT(brand_auxv32[0].a_type == AT_SUN_BRAND_COMMON_LDDATA);
1001                 brand_auxv32[0].a_un.a_val = (uint32_t)sed.sed_lddata;
1002                 if (copyout(&brand_auxv32, args->auxp_brand,
1003                     sizeof (brand_auxv32)) != 0)
1004                         return (EFAULT);
1005         }
1006 #endif  /* _LP64 */
1007 
1008         /*
1009          * Third, the /proc aux vectors set up by elfexec() point to
1010          * brand emulation library and its linker.  Copy these to the
1011          * /proc brand specific aux vector, and update the regular
1012          * /proc aux vectors to point to the executable (and its
1013          * linker).  This will enable debuggers to access the
1014          * executable via the usual /proc or elf notes aux vectors.
1015          *
1016          * The brand emulation library's linker will get it's aux
1017          * vectors off the stack, and then update the stack with the
1018          * executable's aux vectors before jumping to the executable's
1019          * linker.
1020          *
1021          * Debugging the brand emulation library must be done from
1022          * the global zone, where the librtld_db module knows how to
1023          * fetch the brand specific aux vectors to access the brand
1024          * emulation libraries linker.
1025          */
1026         for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
1027                 ulong_t val;
1028 
1029                 switch (up->u_auxv[i].a_type) {
1030                 case AT_SUN_BRAND_COMMON_LDDATA:
1031                         up->u_auxv[i].a_un.a_val = sed.sed_lddata;
1032                         continue;
1033                 case AT_BASE:
1034                         val = sedp->sed_base;
1035                         break;
1036                 case AT_ENTRY:
1037                         val = sedp->sed_entry;
1038                         break;
1039                 case AT_PHDR:
1040                         val = sedp->sed_phdr;
1041                         break;
1042                 case AT_PHENT:
1043                         val = sedp->sed_phent;
1044                         break;
1045                 case AT_PHNUM:
1046                         val = sedp->sed_phnum;
1047                         break;
1048                 case AT_SUN_LDDATA:
1049                         val = sedp->sed_lddata;
1050                         break;
1051                 default:
1052                         continue;
1053                 }
1054 
1055                 up->u_auxv[i].a_un.a_val = val;
1056                 if (val == NULL) {
1057                         /* Hide the entry for static binaries */
1058                         up->u_auxv[i].a_type = AT_IGNORE;
1059                 }
1060         }
1061 
1062         /*
1063          * The last thing we do here is clear spd->spd_handler.  This
1064          * is important because if we're already a branded process and
1065          * if this exec succeeds, there is a window between when the
1066          * exec() first returns to the userland of the new process and
1067          * when our brand library get's initialized, during which we
1068          * don't want system calls to be re-directed to our brand
1069          * library since it hasn't been initialized yet.
1070          */
1071         spd->spd_handler = NULL;
1072 
1073         return (0);
1074 }
1075 
1076 void
1077 brand_solaris_exec(struct brand *pbrand)
1078 {
1079         brand_proc_data_t       *spd = curproc->p_brand_data;
1080 
1081         ASSERT(curproc->p_brand == pbrand);
1082         ASSERT(curproc->p_brand_data != NULL);
1083         ASSERT(ttolwp(curthread)->lwp_brand != NULL);
1084 
1085         /*
1086          * We should only be called from exec(), when we know the process
1087          * is single-threaded.
1088          */
1089         ASSERT(curproc->p_tlist == curproc->p_tlist->t_forw);
1090 
1091         /* Upon exec, reset our lwp brand data. */
1092         (void) brand_solaris_freelwp(ttolwp(curthread), pbrand);
1093         (void) brand_solaris_initlwp(ttolwp(curthread), pbrand);
1094 
1095         /*
1096          * Upon exec, reset all the proc brand data, except for the elf
1097          * data associated with the executable we are exec'ing.
1098          */
1099         spd->spd_handler = NULL;
1100 }
1101 
1102 int
1103 brand_solaris_fini(char **emul_table, struct modlinkage *modlinkage,
1104     struct brand *pbrand)
1105 {
1106         int err;
1107 
1108         /*
1109          * If there are any zones using this brand, we can't allow it
1110          * to be unloaded.
1111          */
1112         if (brand_zone_count(pbrand))
1113                 return (EBUSY);
1114 
1115         kmem_free(*emul_table, NSYSCALL);
1116         *emul_table = NULL;
1117 
1118         err = mod_remove(modlinkage);
1119         if (err)
1120                 cmn_err(CE_WARN, "Couldn't unload brand module");
1121 
1122         return (err);
1123 }
1124 
1125 /*ARGSUSED*/
1126 void
1127 brand_solaris_forklwp(klwp_t *p, klwp_t *c, struct brand *pbrand)
1128 {
1129         ASSERT(p->lwp_procp->p_brand == pbrand);
1130         ASSERT(c->lwp_procp->p_brand == pbrand);
1131 
1132         ASSERT(p->lwp_procp->p_brand_data != NULL);
1133         ASSERT(c->lwp_procp->p_brand_data != NULL);
1134 
1135         /*
1136          * Both LWPs have already had been initialized via
1137          * brand_solaris_initlwp().
1138          */
1139         ASSERT(p->lwp_brand != NULL);
1140         ASSERT(c->lwp_brand != NULL);
1141 }
1142 
1143 /*ARGSUSED*/
1144 void
1145 brand_solaris_freelwp(klwp_t *l, struct brand *pbrand)
1146 {
1147         ASSERT(l->lwp_procp->p_brand == pbrand);
1148         ASSERT(l->lwp_procp->p_brand_data != NULL);
1149         ASSERT(l->lwp_brand != NULL);
1150         l->lwp_brand = NULL;
1151 }
1152 
1153 /*ARGSUSED*/
1154 void
1155 brand_solaris_initlwp(klwp_t *l, struct brand *pbrand)
1156 {
1157         ASSERT(l->lwp_procp->p_brand == pbrand);
1158         ASSERT(l->lwp_procp->p_brand_data != NULL);
1159         ASSERT(l->lwp_brand == NULL);
1160         l->lwp_brand = (void *)-1;
1161 }
1162 
1163 /*ARGSUSED*/
1164 void
1165 brand_solaris_lwpexit(klwp_t *l, struct brand *pbrand)
1166 {
1167         ASSERT(l->lwp_procp->p_brand == pbrand);
1168         ASSERT(l->lwp_procp->p_brand_data != NULL);
1169         ASSERT(l->lwp_brand != NULL);
1170 }
1171 
1172 /*ARGSUSED*/
1173 void
1174 brand_solaris_proc_exit(struct proc *p, struct brand *pbrand)
1175 {
1176         ASSERT(p->p_brand == pbrand);
1177         ASSERT(p->p_brand_data != NULL);
1178 
1179         /* upon exit, free our proc brand data */
1180         kmem_free(p->p_brand_data, sizeof (brand_proc_data_t));
1181         p->p_brand_data = NULL;
1182 }
1183 
1184 void
1185 brand_solaris_setbrand(proc_t *p, struct brand *pbrand)
1186 {
1187         ASSERT(p->p_brand == pbrand);
1188         ASSERT(p->p_brand_data == NULL);
1189 
1190         /*
1191          * We should only be called from exec(), when we know the process
1192          * is single-threaded.
1193          */
1194         ASSERT(p->p_tlist == p->p_tlist->t_forw);
1195 
1196         p->p_brand_data = kmem_zalloc(sizeof (brand_proc_data_t), KM_SLEEP);
1197 }