Print this page
OS-4460 exec brands processes that still have multiple threads
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-4188 NULL dereference in lwp_hash_in
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-4119 lxbrand panic when running native perl inside lx zone
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-4151 setbrand hooks should be sane during fork
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-4129 lxbrand should not abuse p_brand_data for storing exit signal
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-3712 lx brand: DTrace pid provider induces core dumps on 64-bit processes
OS-3517 lx brand: branded zones don't interpret .interp section
OS-3149 lx brand always sends SIGCHLD to parent processes, regardless of how clone was invoked
OS-2887 lxbrand add WALL, WCLONE, WNOTHREAD support to waitid
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
OS-2877 lx_librtld_db falls to load due to NULL DT_DEBUG
OS-2834 ship lx brand

@@ -18,10 +18,11 @@
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
  */
 
 #include <sys/kmem.h>
 #include <sys/errno.h>
 #include <sys/systm.h>

@@ -43,19 +44,20 @@
 struct brand_mach_ops native_mach_ops  = {
                 NULL, NULL
 };
 #else /* !__sparcv9 */
 struct brand_mach_ops native_mach_ops  = {
-                NULL, NULL, NULL, NULL
+                NULL, NULL, NULL, NULL, NULL, NULL, NULL
 };
 #endif /* !__sparcv9 */
 
 brand_t native_brand = {
                 BRAND_VER_1,
                 "native",
                 NULL,
-                &native_mach_ops
+                &native_mach_ops,
+                0
 };
 
 /*
  * Used to maintain a list of all the brands currently loaded into the
  * kernel.

@@ -308,50 +310,116 @@
                 }
         }
         mutex_exit(&brand_list_lock);
 }
 
-void
-brand_setbrand(proc_t *p)
+int
+brand_setbrand(proc_t *p, boolean_t lwps_ok)
 {
         brand_t *bp = p->p_zone->zone_brand;
+        void *brand_data = NULL;
 
-        ASSERT(bp != NULL);
-        ASSERT(p->p_brand == &native_brand);
+        VERIFY(MUTEX_NOT_HELD(&p->p_lock));
+        VERIFY(bp != NULL);
 
         /*
-         * We should only be called from exec(), when we know the process
-         * is single-threaded.
+         * Process branding occurs during fork() and exec().  When it happens
+         * during fork(), the LWP count will always be 0 since branding is
+         * performed as part of getproc(), before LWPs have been associated.
+         * The same is not true during exec(), where a multi-LWP process may
+         * undergo branding just prior to gexec(). This is to ensure
+         * exec-related brand hooks are available.  While it may seem
+         * complicated to brand a multi-LWP process, the two possible outcomes
+         * simplify things:
+         *
+         * 1. The exec() succeeds:  LWPs besides the caller will be killed and
+         *    any further branding will occur in a single-LWP context.
+         * 2. The exec() fails: The process will be promptly unbranded since
+         *    the hooks are no longer needed.
+         *
+         * To prevent inconsistent brand state from being encountered during
+         * the exec(), LWPs beyond the caller which are associated with this
+         * process must be held temporarily.  They will be released either when
+         * they are killed in the exec() success, or when the brand is cleared
+         * after exec() failure.
          */
-        ASSERT(p->p_tlist == p->p_tlist->t_forw);
+        if (lwps_ok) {
+                /*
+                 * We've been called from a exec() context tolerating the
+                 * existence of multiple LWPs during branding is necessary.
+                 */
+                VERIFY(p == curproc);
+                VERIFY(p->p_tlist != NULL);
 
+                if (p->p_tlist != p->p_tlist->t_forw) {
+                        /*
+                         * Multiple LWPs are present.  Hold all but the caller.
+                         */
+                        if (!holdlwps(SHOLDFORK1)) {
+                                return (-1);
+                        }
+                }
+        } else {
+                /*
+                 * Processes branded during fork() should not have LWPs at all.
+                 */
+                VERIFY(p->p_tlist == NULL);
+        }
+
+        if (bp->b_data_size > 0) {
+                brand_data = kmem_zalloc(bp->b_data_size, KM_SLEEP);
+        }
+
+        mutex_enter(&p->p_lock);
+        ASSERT(!PROC_IS_BRANDED(p));
         p->p_brand = bp;
+        p->p_brand_data = brand_data;
         ASSERT(PROC_IS_BRANDED(p));
         BROP(p)->b_setbrand(p);
+        mutex_exit(&p->p_lock);
+        return (0);
 }
 
 void
-brand_clearbrand(proc_t *p, boolean_t no_lwps)
+brand_clearbrand(proc_t *p, boolean_t lwps_ok)
 {
         brand_t *bp = p->p_zone->zone_brand;
-        klwp_t *lwp = NULL;
-        ASSERT(bp != NULL);
-        ASSERT(!no_lwps || (p->p_tlist == NULL));
+        void *brand_data;
 
+        VERIFY(MUTEX_NOT_HELD(&p->p_lock));
+        VERIFY(bp != NULL);
+        VERIFY(PROC_IS_BRANDED(p));
+
+        mutex_enter(&p->p_lock);
+        p->p_brand = &native_brand;
+        brand_data = p->p_brand_data;
+        p->p_brand_data = NULL;
+
+        if (lwps_ok) {
+                VERIFY(p == curproc);
         /*
-         * If called from exec_common() or proc_exit(),
-         * we know the process is single-threaded.
-         * If called from fork_fail, p_tlist is NULL.
+                 * A process with multiple LWPs is being de-branded after
+                 * failing an exec.  The other LWPs were held as part of the
+                 * procedure, so they must be resumed now.
          */
-        if (!no_lwps) {
-                ASSERT(p->p_tlist == p->p_tlist->t_forw);
-                lwp = p->p_tlist->t_lwp;
+                if (p->p_tlist != NULL && p->p_tlist != p->p_tlist->t_forw) {
+                        continuelwps(p);
         }
+        } else {
+                /*
+                 * While clearing the brand, it's ok for one LWP to be present.
+                 * This happens when a native binary is executed inside a
+                 * branded zone, since the brand will be removed during the
+                 * course of a successful exec.
+                 */
+                VERIFY(p->p_tlist == NULL || p->p_tlist == p->p_tlist->t_forw);
+        }
+        mutex_exit(&p->p_lock);
 
-        ASSERT(PROC_IS_BRANDED(p));
-        BROP(p)->b_proc_exit(p, lwp);
-        p->p_brand = &native_brand;
+        if (brand_data != NULL) {
+                kmem_free(brand_data, bp->b_data_size);
+        }
 }
 
 #if defined(__sparcv9)
 /*
  * Currently, only sparc has system level brand syscall interposition.

@@ -481,11 +549,11 @@
          */
         if (cmd == B_EXEC_BRAND)
                 return (ENOSYS);
 
         /* For all other operations this must be a branded process. */
-        if (p->p_brand == &native_brand)
+        if (!PROC_IS_BRANDED(p))
                 return (ENOSYS);
 
         ASSERT(p->p_brand == pbrand);
         ASSERT(p->p_brand_data != NULL);
 

@@ -599,29 +667,28 @@
 
 /*ARGSUSED*/
 int
 brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
     intpdata_t *idatap, int level, long *execsz, int setid, caddr_t exec_file,
-    cred_t *cred, int brand_action, struct brand *pbrand, char *bname,
-    char *brandlib, char *brandlib32, char *brandlinker, char *brandlinker32)
+    cred_t *cred, int *brand_action, struct brand *pbrand, char *bname,
+    char *brandlib, char *brandlib32)
 {
 
         vnode_t         *nvp;
         Ehdr            ehdr;
         Addr            uphdr_vaddr;
         intptr_t        voffset;
-        int             interp;
+        char            *interp;
         int             i, err;
         struct execenv  env;
         struct execenv  origenv;
         stack_t         orig_sigaltstack;
         struct user     *up = PTOU(curproc);
         proc_t          *p = ttoproc(curthread);
         klwp_t          *lwp = ttolwp(curthread);
         brand_proc_data_t       *spd;
         brand_elf_data_t sed, *sedp;
-        char            *linker;
         uintptr_t       lddata; /* lddata of executable's linker */
 
         ASSERT(curproc->p_brand == pbrand);
         ASSERT(curproc->p_brand_data != NULL);
 

@@ -634,16 +701,14 @@
          * We will exec the brand library and then map in the target
          * application and (optionally) the brand's default linker.
          */
         if (args->to_model == DATAMODEL_NATIVE) {
                 args->emulator = brandlib;
-                linker = brandlinker;
         }
 #if defined(_LP64)
         else {
                 args->emulator = brandlib32;
-                linker = brandlinker32;
         }
 #endif  /* _LP64 */
 
         if ((err = lookupname(args->emulator, UIO_SYSSPACE, FOLLOW,
             NULLVPP, &nvp)) != 0) {

@@ -723,19 +788,19 @@
 
         bzero(&env, sizeof (env));
         if (args->to_model == DATAMODEL_NATIVE) {
                 err = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr,
                     &voffset, exec_file, &interp, &env.ex_bssbase,
-                    &env.ex_brkbase, &env.ex_brksize, NULL);
+                    &env.ex_brkbase, &env.ex_brksize, NULL, NULL);
         }
 #if defined(_LP64)
         else {
                 Elf32_Ehdr ehdr32;
                 Elf32_Addr uphdr_vaddr32;
                 err = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32,
                     &voffset, exec_file, &interp, &env.ex_bssbase,
-                    &env.ex_brkbase, &env.ex_brksize, NULL);
+                    &env.ex_brkbase, &env.ex_brksize, NULL, NULL);
                 Ehdr32to64(&ehdr32, &ehdr);
 
                 if (uphdr_vaddr32 == (Elf32_Addr)-1)
                         uphdr_vaddr = (Addr)-1;
                 else

@@ -742,10 +807,14 @@
                         uphdr_vaddr = uphdr_vaddr32;
         }
 #endif  /* _LP64 */
         if (err != 0) {
                 restoreexecenv(&origenv, &orig_sigaltstack);
+
+                if (interp != NULL)
+                        kmem_free(interp, MAXPATHLEN);
+
                 return (err);
         }
 
         /*
          * Save off the important properties of the executable. The

@@ -759,11 +828,11 @@
                 sedp->sed_phdr = voffset + uphdr_vaddr;
         sedp->sed_entry = voffset + ehdr.e_entry;
         sedp->sed_phent = ehdr.e_phentsize;
         sedp->sed_phnum = ehdr.e_phnum;
 
-        if (interp) {
+        if (interp != NULL) {
                 if (ehdr.e_type == ET_DYN) {
                         /*
                          * This is a shared object executable, so we
                          * need to pick a reasonable place to put the
                          * heap. Just don't use the first page.

@@ -775,28 +844,32 @@
                 /*
                  * If the program needs an interpreter (most do), map
                  * it in and store relevant information about it in the
                  * aux vector, where the brand library can find it.
                  */
-                if ((err = lookupname(linker, UIO_SYSSPACE,
+                if ((err = lookupname(interp, UIO_SYSSPACE,
                     FOLLOW, NULLVPP, &nvp)) != 0) {
-                        uprintf("%s: not found.", brandlinker);
+                        uprintf("%s: not found.", interp);
                         restoreexecenv(&origenv, &orig_sigaltstack);
+                        kmem_free(interp, MAXPATHLEN);
                         return (err);
                 }
+
+                kmem_free(interp, MAXPATHLEN);
+
                 if (args->to_model == DATAMODEL_NATIVE) {
                         err = mapexec_brand(nvp, args, &ehdr,
                             &uphdr_vaddr, &voffset, exec_file, &interp,
-                            NULL, NULL, NULL, &lddata);
+                            NULL, NULL, NULL, &lddata, NULL);
                 }
 #if defined(_LP64)
                 else {
                         Elf32_Ehdr ehdr32;
                         Elf32_Addr uphdr_vaddr32;
                         err = mapexec32_brand(nvp, args, &ehdr32,
                             &uphdr_vaddr32, &voffset, exec_file, &interp,
-                            NULL, NULL, NULL, &lddata);
+                            NULL, NULL, NULL, &lddata, NULL);
                         Ehdr32to64(&ehdr32, &ehdr);
 
                         if (uphdr_vaddr32 == (Elf32_Addr)-1)
                                 uphdr_vaddr = (Addr)-1;
                         else

@@ -932,13 +1005,13 @@
         }
 #endif  /* _LP64 */
 
         /*
          * Third, the /proc aux vectors set up by elfexec() point to
-         * brand emulation library and it's linker.  Copy these to the
+         * brand emulation library and its linker.  Copy these to the
          * /proc brand specific aux vector, and update the regular
-         * /proc aux vectors to point to the executable (and it's
+         * /proc aux vectors to point to the executable (and its
          * linker).  This will enable debuggers to access the
          * executable via the usual /proc or elf notes aux vectors.
          *
          * The brand emulation library's linker will get it's aux
          * vectors off the stack, and then update the stack with the

@@ -1076,59 +1149,35 @@
         ASSERT(l->lwp_brand != NULL);
         l->lwp_brand = NULL;
 }
 
 /*ARGSUSED*/
-int
+void
 brand_solaris_initlwp(klwp_t *l, struct brand *pbrand)
 {
         ASSERT(l->lwp_procp->p_brand == pbrand);
         ASSERT(l->lwp_procp->p_brand_data != NULL);
         ASSERT(l->lwp_brand == NULL);
         l->lwp_brand = (void *)-1;
-        return (0);
 }
 
 /*ARGSUSED*/
 void
 brand_solaris_lwpexit(klwp_t *l, struct brand *pbrand)
 {
-        proc_t  *p = l->lwp_procp;
-
         ASSERT(l->lwp_procp->p_brand == pbrand);
         ASSERT(l->lwp_procp->p_brand_data != NULL);
         ASSERT(l->lwp_brand != NULL);
-
-        /*
-         * We should never be called for the last thread in a process.
-         * (That case is handled by brand_solaris_proc_exit().)
-         * Therefore this lwp must be exiting from a multi-threaded
-         * process.
-         */
-        ASSERT(p->p_tlist != p->p_tlist->t_forw);
-
-        l->lwp_brand = NULL;
 }
 
 /*ARGSUSED*/
 void
-brand_solaris_proc_exit(struct proc *p, klwp_t *l, struct brand *pbrand)
+brand_solaris_proc_exit(struct proc *p, struct brand *pbrand)
 {
         ASSERT(p->p_brand == pbrand);
         ASSERT(p->p_brand_data != NULL);
 
-        /*
-         * When called from proc_exit(), we know that process is
-         * single-threaded and free our lwp brand data.
-         * otherwise just free p_brand_data and return.
-         */
-        if (l != NULL) {
-                ASSERT(p->p_tlist == p->p_tlist->t_forw);
-                ASSERT(p->p_tlist->t_lwp == l);
-                (void) brand_solaris_freelwp(l, pbrand);
-        }
-
         /* upon exit, free our proc brand data */
         kmem_free(p->p_brand_data, sizeof (brand_proc_data_t));
         p->p_brand_data = NULL;
 }
 

@@ -1143,7 +1192,6 @@
          * is single-threaded.
          */
         ASSERT(p->p_tlist == p->p_tlist->t_forw);
 
         p->p_brand_data = kmem_zalloc(sizeof (brand_proc_data_t), KM_SLEEP);
-        (void) brand_solaris_initlwp(p->p_tlist->t_lwp, pbrand);
 }