Print this page
OS-4460 exec brands processes that still have multiple threads
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-4188 NULL dereference in lwp_hash_in
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-4119 lxbrand panic when running native perl inside lx zone
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-4151 setbrand hooks should be sane during fork
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-4129 lxbrand should not abuse p_brand_data for storing exit signal
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-3712 lx brand: DTrace pid provider induces core dumps on 64-bit processes
OS-3517 lx brand: branded zones don't interpret .interp section
OS-3149 lx brand always sends SIGCHLD to parent processes, regardless of how clone was invoked
OS-2887 lxbrand add WALL, WCLONE, WNOTHREAD support to waitid
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
OS-2877 lx_librtld_db falls to load due to NULL DT_DEBUG
OS-2834 ship lx brand
        
*** 18,27 ****
--- 18,28 ----
   *
   * CDDL HEADER END
   */
  /*
   * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+  * Copyright (c) 2015, Joyent, Inc. All rights reserved.
   */
  
  #include <sys/kmem.h>
  #include <sys/errno.h>
  #include <sys/systm.h>
*** 43,61 ****
  struct brand_mach_ops native_mach_ops  = {
                  NULL, NULL
  };
  #else /* !__sparcv9 */
  struct brand_mach_ops native_mach_ops  = {
!                 NULL, NULL, NULL, NULL
  };
  #endif /* !__sparcv9 */
  
  brand_t native_brand = {
                  BRAND_VER_1,
                  "native",
                  NULL,
!                 &native_mach_ops
  };
  
  /*
   * Used to maintain a list of all the brands currently loaded into the
   * kernel.
--- 44,63 ----
  struct brand_mach_ops native_mach_ops  = {
                  NULL, NULL
  };
  #else /* !__sparcv9 */
  struct brand_mach_ops native_mach_ops  = {
!                 NULL, NULL, NULL, NULL, NULL, NULL, NULL
  };
  #endif /* !__sparcv9 */
  
  brand_t native_brand = {
                  BRAND_VER_1,
                  "native",
                  NULL,
!                 &native_mach_ops,
!                 0
  };
  
  /*
   * Used to maintain a list of all the brands currently loaded into the
   * kernel.
*** 308,357 ****
                  }
          }
          mutex_exit(&brand_list_lock);
  }
  
! void
! brand_setbrand(proc_t *p)
  {
          brand_t *bp = p->p_zone->zone_brand;
  
!         ASSERT(bp != NULL);
!         ASSERT(p->p_brand == &native_brand);
  
          /*
!          * We should only be called from exec(), when we know the process
!          * is single-threaded.
           */
!         ASSERT(p->p_tlist == p->p_tlist->t_forw);
  
          p->p_brand = bp;
          ASSERT(PROC_IS_BRANDED(p));
          BROP(p)->b_setbrand(p);
  }
  
  void
! brand_clearbrand(proc_t *p, boolean_t no_lwps)
  {
          brand_t *bp = p->p_zone->zone_brand;
!         klwp_t *lwp = NULL;
!         ASSERT(bp != NULL);
!         ASSERT(!no_lwps || (p->p_tlist == NULL));
  
          /*
!          * If called from exec_common() or proc_exit(),
!          * we know the process is single-threaded.
!          * If called from fork_fail, p_tlist is NULL.
           */
!         if (!no_lwps) {
!                 ASSERT(p->p_tlist == p->p_tlist->t_forw);
!                 lwp = p->p_tlist->t_lwp;
          }
  
!         ASSERT(PROC_IS_BRANDED(p));
!         BROP(p)->b_proc_exit(p, lwp);
!         p->p_brand = &native_brand;
  }
  
  #if defined(__sparcv9)
  /*
   * Currently, only sparc has system level brand syscall interposition.
--- 310,425 ----
                  }
          }
          mutex_exit(&brand_list_lock);
  }
  
! int
! brand_setbrand(proc_t *p, boolean_t lwps_ok)
  {
          brand_t *bp = p->p_zone->zone_brand;
+         void *brand_data = NULL;
  
!         VERIFY(MUTEX_NOT_HELD(&p->p_lock));
!         VERIFY(bp != NULL);
  
          /*
!          * Process branding occurs during fork() and exec().  When it happens
!          * during fork(), the LWP count will always be 0 since branding is
!          * performed as part of getproc(), before LWPs have been associated.
!          * The same is not true during exec(), where a multi-LWP process may
!          * undergo branding just prior to gexec(). This is to ensure
!          * exec-related brand hooks are available.  While it may seem
!          * complicated to brand a multi-LWP process, the two possible outcomes
!          * simplify things:
!          *
!          * 1. The exec() succeeds:  LWPs besides the caller will be killed and
!          *    any further branding will occur in a single-LWP context.
!          * 2. The exec() fails: The process will be promptly unbranded since
!          *    the hooks are no longer needed.
!          *
!          * To prevent inconsistent brand state from being encountered during
!          * the exec(), LWPs beyond the caller which are associated with this
!          * process must be held temporarily.  They will be released either when
!          * they are killed in the exec() success, or when the brand is cleared
!          * after exec() failure.
           */
!         if (lwps_ok) {
!                 /*
!                  * We've been called from a exec() context tolerating the
!                  * existence of multiple LWPs during branding is necessary.
!                  */
!                 VERIFY(p == curproc);
!                 VERIFY(p->p_tlist != NULL);
  
+                 if (p->p_tlist != p->p_tlist->t_forw) {
+                         /*
+                          * Multiple LWPs are present.  Hold all but the caller.
+                          */
+                         if (!holdlwps(SHOLDFORK1)) {
+                                 return (-1);
+                         }
+                 }
+         } else {
+                 /*
+                  * Processes branded during fork() should not have LWPs at all.
+                  */
+                 VERIFY(p->p_tlist == NULL);
+         }
+ 
+         if (bp->b_data_size > 0) {
+                 brand_data = kmem_zalloc(bp->b_data_size, KM_SLEEP);
+         }
+ 
+         mutex_enter(&p->p_lock);
+         ASSERT(!PROC_IS_BRANDED(p));
          p->p_brand = bp;
+         p->p_brand_data = brand_data;
          ASSERT(PROC_IS_BRANDED(p));
          BROP(p)->b_setbrand(p);
+         mutex_exit(&p->p_lock);
+         return (0);
  }
  
  void
! brand_clearbrand(proc_t *p, boolean_t lwps_ok)
  {
          brand_t *bp = p->p_zone->zone_brand;
!         void *brand_data;
  
+         VERIFY(MUTEX_NOT_HELD(&p->p_lock));
+         VERIFY(bp != NULL);
+         VERIFY(PROC_IS_BRANDED(p));
+ 
+         mutex_enter(&p->p_lock);
+         p->p_brand = &native_brand;
+         brand_data = p->p_brand_data;
+         p->p_brand_data = NULL;
+ 
+         if (lwps_ok) {
+                 VERIFY(p == curproc);
                  /*
!                  * A process with multiple LWPs is being de-branded after
!                  * failing an exec.  The other LWPs were held as part of the
!                  * procedure, so they must be resumed now.
                   */
!                 if (p->p_tlist != NULL && p->p_tlist != p->p_tlist->t_forw) {
!                         continuelwps(p);
                  }
+         } else {
+                 /*
+                  * While clearing the brand, it's ok for one LWP to be present.
+                  * This happens when a native binary is executed inside a
+                  * branded zone, since the brand will be removed during the
+                  * course of a successful exec.
+                  */
+                 VERIFY(p->p_tlist == NULL || p->p_tlist == p->p_tlist->t_forw);
+         }
+         mutex_exit(&p->p_lock);
  
!         if (brand_data != NULL) {
!                 kmem_free(brand_data, bp->b_data_size);
!         }
  }
  
  #if defined(__sparcv9)
  /*
   * Currently, only sparc has system level brand syscall interposition.
*** 481,491 ****
           */
          if (cmd == B_EXEC_BRAND)
                  return (ENOSYS);
  
          /* For all other operations this must be a branded process. */
!         if (p->p_brand == &native_brand)
                  return (ENOSYS);
  
          ASSERT(p->p_brand == pbrand);
          ASSERT(p->p_brand_data != NULL);
  
--- 549,559 ----
           */
          if (cmd == B_EXEC_BRAND)
                  return (ENOSYS);
  
          /* For all other operations this must be a branded process. */
!         if (!PROC_IS_BRANDED(p))
                  return (ENOSYS);
  
          ASSERT(p->p_brand == pbrand);
          ASSERT(p->p_brand_data != NULL);
  
*** 599,627 ****
  
  /*ARGSUSED*/
  int
  brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
      intpdata_t *idatap, int level, long *execsz, int setid, caddr_t exec_file,
!     cred_t *cred, int brand_action, struct brand *pbrand, char *bname,
!     char *brandlib, char *brandlib32, char *brandlinker, char *brandlinker32)
  {
  
          vnode_t         *nvp;
          Ehdr            ehdr;
          Addr            uphdr_vaddr;
          intptr_t        voffset;
!         int             interp;
          int             i, err;
          struct execenv  env;
          struct execenv  origenv;
          stack_t         orig_sigaltstack;
          struct user     *up = PTOU(curproc);
          proc_t          *p = ttoproc(curthread);
          klwp_t          *lwp = ttolwp(curthread);
          brand_proc_data_t       *spd;
          brand_elf_data_t sed, *sedp;
-         char            *linker;
          uintptr_t       lddata; /* lddata of executable's linker */
  
          ASSERT(curproc->p_brand == pbrand);
          ASSERT(curproc->p_brand_data != NULL);
  
--- 667,694 ----
  
  /*ARGSUSED*/
  int
  brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
      intpdata_t *idatap, int level, long *execsz, int setid, caddr_t exec_file,
!     cred_t *cred, int *brand_action, struct brand *pbrand, char *bname,
!     char *brandlib, char *brandlib32)
  {
  
          vnode_t         *nvp;
          Ehdr            ehdr;
          Addr            uphdr_vaddr;
          intptr_t        voffset;
!         char            *interp;
          int             i, err;
          struct execenv  env;
          struct execenv  origenv;
          stack_t         orig_sigaltstack;
          struct user     *up = PTOU(curproc);
          proc_t          *p = ttoproc(curthread);
          klwp_t          *lwp = ttolwp(curthread);
          brand_proc_data_t       *spd;
          brand_elf_data_t sed, *sedp;
          uintptr_t       lddata; /* lddata of executable's linker */
  
          ASSERT(curproc->p_brand == pbrand);
          ASSERT(curproc->p_brand_data != NULL);
  
*** 634,649 ****
           * We will exec the brand library and then map in the target
           * application and (optionally) the brand's default linker.
           */
          if (args->to_model == DATAMODEL_NATIVE) {
                  args->emulator = brandlib;
-                 linker = brandlinker;
          }
  #if defined(_LP64)
          else {
                  args->emulator = brandlib32;
-                 linker = brandlinker32;
          }
  #endif  /* _LP64 */
  
          if ((err = lookupname(args->emulator, UIO_SYSSPACE, FOLLOW,
              NULLVPP, &nvp)) != 0) {
--- 701,714 ----
*** 723,741 ****
  
          bzero(&env, sizeof (env));
          if (args->to_model == DATAMODEL_NATIVE) {
                  err = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr,
                      &voffset, exec_file, &interp, &env.ex_bssbase,
!                     &env.ex_brkbase, &env.ex_brksize, NULL);
          }
  #if defined(_LP64)
          else {
                  Elf32_Ehdr ehdr32;
                  Elf32_Addr uphdr_vaddr32;
                  err = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32,
                      &voffset, exec_file, &interp, &env.ex_bssbase,
!                     &env.ex_brkbase, &env.ex_brksize, NULL);
                  Ehdr32to64(&ehdr32, &ehdr);
  
                  if (uphdr_vaddr32 == (Elf32_Addr)-1)
                          uphdr_vaddr = (Addr)-1;
                  else
--- 788,806 ----
  
          bzero(&env, sizeof (env));
          if (args->to_model == DATAMODEL_NATIVE) {
                  err = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr,
                      &voffset, exec_file, &interp, &env.ex_bssbase,
!                     &env.ex_brkbase, &env.ex_brksize, NULL, NULL);
          }
  #if defined(_LP64)
          else {
                  Elf32_Ehdr ehdr32;
                  Elf32_Addr uphdr_vaddr32;
                  err = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32,
                      &voffset, exec_file, &interp, &env.ex_bssbase,
!                     &env.ex_brkbase, &env.ex_brksize, NULL, NULL);
                  Ehdr32to64(&ehdr32, &ehdr);
  
                  if (uphdr_vaddr32 == (Elf32_Addr)-1)
                          uphdr_vaddr = (Addr)-1;
                  else
*** 742,751 ****
--- 807,820 ----
                          uphdr_vaddr = uphdr_vaddr32;
          }
  #endif  /* _LP64 */
          if (err != 0) {
                  restoreexecenv(&origenv, &orig_sigaltstack);
+ 
+                 if (interp != NULL)
+                         kmem_free(interp, MAXPATHLEN);
+ 
                  return (err);
          }
  
          /*
           * Save off the important properties of the executable. The
*** 759,769 ****
                  sedp->sed_phdr = voffset + uphdr_vaddr;
          sedp->sed_entry = voffset + ehdr.e_entry;
          sedp->sed_phent = ehdr.e_phentsize;
          sedp->sed_phnum = ehdr.e_phnum;
  
!         if (interp) {
                  if (ehdr.e_type == ET_DYN) {
                          /*
                           * This is a shared object executable, so we
                           * need to pick a reasonable place to put the
                           * heap. Just don't use the first page.
--- 828,838 ----
                  sedp->sed_phdr = voffset + uphdr_vaddr;
          sedp->sed_entry = voffset + ehdr.e_entry;
          sedp->sed_phent = ehdr.e_phentsize;
          sedp->sed_phnum = ehdr.e_phnum;
  
!         if (interp != NULL) {
                  if (ehdr.e_type == ET_DYN) {
                          /*
                           * This is a shared object executable, so we
                           * need to pick a reasonable place to put the
                           * heap. Just don't use the first page.
*** 775,802 ****
                  /*
                   * If the program needs an interpreter (most do), map
                   * it in and store relevant information about it in the
                   * aux vector, where the brand library can find it.
                   */
!                 if ((err = lookupname(linker, UIO_SYSSPACE,
                      FOLLOW, NULLVPP, &nvp)) != 0) {
!                         uprintf("%s: not found.", brandlinker);
                          restoreexecenv(&origenv, &orig_sigaltstack);
                          return (err);
                  }
                  if (args->to_model == DATAMODEL_NATIVE) {
                          err = mapexec_brand(nvp, args, &ehdr,
                              &uphdr_vaddr, &voffset, exec_file, &interp,
!                             NULL, NULL, NULL, &lddata);
                  }
  #if defined(_LP64)
                  else {
                          Elf32_Ehdr ehdr32;
                          Elf32_Addr uphdr_vaddr32;
                          err = mapexec32_brand(nvp, args, &ehdr32,
                              &uphdr_vaddr32, &voffset, exec_file, &interp,
!                             NULL, NULL, NULL, &lddata);
                          Ehdr32to64(&ehdr32, &ehdr);
  
                          if (uphdr_vaddr32 == (Elf32_Addr)-1)
                                  uphdr_vaddr = (Addr)-1;
                          else
--- 844,875 ----
                  /*
                   * If the program needs an interpreter (most do), map
                   * it in and store relevant information about it in the
                   * aux vector, where the brand library can find it.
                   */
!                 if ((err = lookupname(interp, UIO_SYSSPACE,
                      FOLLOW, NULLVPP, &nvp)) != 0) {
!                         uprintf("%s: not found.", interp);
                          restoreexecenv(&origenv, &orig_sigaltstack);
+                         kmem_free(interp, MAXPATHLEN);
                          return (err);
                  }
+ 
+                 kmem_free(interp, MAXPATHLEN);
+ 
                  if (args->to_model == DATAMODEL_NATIVE) {
                          err = mapexec_brand(nvp, args, &ehdr,
                              &uphdr_vaddr, &voffset, exec_file, &interp,
!                             NULL, NULL, NULL, &lddata, NULL);
                  }
  #if defined(_LP64)
                  else {
                          Elf32_Ehdr ehdr32;
                          Elf32_Addr uphdr_vaddr32;
                          err = mapexec32_brand(nvp, args, &ehdr32,
                              &uphdr_vaddr32, &voffset, exec_file, &interp,
!                             NULL, NULL, NULL, &lddata, NULL);
                          Ehdr32to64(&ehdr32, &ehdr);
  
                          if (uphdr_vaddr32 == (Elf32_Addr)-1)
                                  uphdr_vaddr = (Addr)-1;
                          else
*** 932,944 ****
          }
  #endif  /* _LP64 */
  
          /*
           * Third, the /proc aux vectors set up by elfexec() point to
!          * brand emulation library and it's linker.  Copy these to the
           * /proc brand specific aux vector, and update the regular
!          * /proc aux vectors to point to the executable (and it's
           * linker).  This will enable debuggers to access the
           * executable via the usual /proc or elf notes aux vectors.
           *
           * The brand emulation library's linker will get it's aux
           * vectors off the stack, and then update the stack with the
--- 1005,1017 ----
          }
  #endif  /* _LP64 */
  
          /*
           * Third, the /proc aux vectors set up by elfexec() point to
!          * brand emulation library and its linker.  Copy these to the
           * /proc brand specific aux vector, and update the regular
!          * /proc aux vectors to point to the executable (and its
           * linker).  This will enable debuggers to access the
           * executable via the usual /proc or elf notes aux vectors.
           *
           * The brand emulation library's linker will get it's aux
           * vectors off the stack, and then update the stack with the
*** 1076,1134 ****
          ASSERT(l->lwp_brand != NULL);
          l->lwp_brand = NULL;
  }
  
  /*ARGSUSED*/
! int
  brand_solaris_initlwp(klwp_t *l, struct brand *pbrand)
  {
          ASSERT(l->lwp_procp->p_brand == pbrand);
          ASSERT(l->lwp_procp->p_brand_data != NULL);
          ASSERT(l->lwp_brand == NULL);
          l->lwp_brand = (void *)-1;
-         return (0);
  }
  
  /*ARGSUSED*/
  void
  brand_solaris_lwpexit(klwp_t *l, struct brand *pbrand)
  {
-         proc_t  *p = l->lwp_procp;
- 
          ASSERT(l->lwp_procp->p_brand == pbrand);
          ASSERT(l->lwp_procp->p_brand_data != NULL);
          ASSERT(l->lwp_brand != NULL);
- 
-         /*
-          * We should never be called for the last thread in a process.
-          * (That case is handled by brand_solaris_proc_exit().)
-          * Therefore this lwp must be exiting from a multi-threaded
-          * process.
-          */
-         ASSERT(p->p_tlist != p->p_tlist->t_forw);
- 
-         l->lwp_brand = NULL;
  }
  
  /*ARGSUSED*/
  void
! brand_solaris_proc_exit(struct proc *p, klwp_t *l, struct brand *pbrand)
  {
          ASSERT(p->p_brand == pbrand);
          ASSERT(p->p_brand_data != NULL);
  
-         /*
-          * When called from proc_exit(), we know that process is
-          * single-threaded and free our lwp brand data.
-          * otherwise just free p_brand_data and return.
-          */
-         if (l != NULL) {
-                 ASSERT(p->p_tlist == p->p_tlist->t_forw);
-                 ASSERT(p->p_tlist->t_lwp == l);
-                 (void) brand_solaris_freelwp(l, pbrand);
-         }
- 
          /* upon exit, free our proc brand data */
          kmem_free(p->p_brand_data, sizeof (brand_proc_data_t));
          p->p_brand_data = NULL;
  }
  
--- 1149,1183 ----
          ASSERT(l->lwp_brand != NULL);
          l->lwp_brand = NULL;
  }
  
  /*ARGSUSED*/
! void
  brand_solaris_initlwp(klwp_t *l, struct brand *pbrand)
  {
          ASSERT(l->lwp_procp->p_brand == pbrand);
          ASSERT(l->lwp_procp->p_brand_data != NULL);
          ASSERT(l->lwp_brand == NULL);
          l->lwp_brand = (void *)-1;
  }
  
  /*ARGSUSED*/
  void
  brand_solaris_lwpexit(klwp_t *l, struct brand *pbrand)
  {
          ASSERT(l->lwp_procp->p_brand == pbrand);
          ASSERT(l->lwp_procp->p_brand_data != NULL);
          ASSERT(l->lwp_brand != NULL);
  }
  
  /*ARGSUSED*/
  void
! brand_solaris_proc_exit(struct proc *p, struct brand *pbrand)
  {
          ASSERT(p->p_brand == pbrand);
          ASSERT(p->p_brand_data != NULL);
  
          /* upon exit, free our proc brand data */
          kmem_free(p->p_brand_data, sizeof (brand_proc_data_t));
          p->p_brand_data = NULL;
  }
  
*** 1143,1149 ****
           * is single-threaded.
           */
          ASSERT(p->p_tlist == p->p_tlist->t_forw);
  
          p->p_brand_data = kmem_zalloc(sizeof (brand_proc_data_t), KM_SLEEP);
-         (void) brand_solaris_initlwp(p->p_tlist->t_lwp, pbrand);
  }
--- 1192,1197 ----