Print this page
Reduce lint
OS-4818 contract template disappears on exec
OS-4460 exec brands processes that still have multiple threads
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-3742 lxbrand add support for signalfd
OS-4382 remove obsolete brand hooks added during lx development
OS-4188 NULL dereference in lwp_hash_in
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-4119 lxbrand panic when running native perl inside lx zone
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-4151 setbrand hooks should be sane during fork
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-4129 lxbrand should not abuse p_brand_data for storing exit signal
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-3561 lxbrand emulation library should execute on alternate stack
OS-3558 lxbrand add support for full in-kernel syscall handling
OS-3545 lx_syscall_regs should not walk stack
OS-3868 many LTP testcases now hang
OS-3901 lxbrand lx_recvmsg fails to translate control messages when 64-bit
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Bryan Cantrill <bryan@joyent.com>
OS-3871 AT_RANDOM aux entry should be populated using random_get_pseudo_bytes
OS-3611 lx brand: 64-bit processes should not use VAs above VA hole
OS-3438 lx brand: "start rsyslog" hangs
OS-3280 need a way to specify the root of a native system in the lx brand
OS-3279 lx brand should allow delegated datasets
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-2949 add support for AT_RANDOM aux vector entry

@@ -24,11 +24,11 @@
  */
 
 /*      Copyright (c) 1988 AT&T */
 /*        All Rights Reserved   */
 /*
- * Copyright 2014, Joyent, Inc.  All rights reserved.
+ * Copyright 2015, Joyent, Inc.  All rights reserved.
  */
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/sysmacros.h>

@@ -67,10 +67,11 @@
 #include <sys/lwpchan_impl.h>
 #include <sys/pool.h>
 #include <sys/sdt.h>
 #include <sys/brand.h>
 #include <sys/klpd.h>
+#include <sys/random.h>
 
 #include <c2/audit.h>
 
 #include <vm/hat.h>
 #include <vm/anon.h>

@@ -95,10 +96,11 @@
 uint_t auxv_hwcap32 = 0;        /* 32-bit version of auxv_hwcap */
 uint_t auxv_hwcap32_2 = 0;      /* 32-bit version of auxv_hwcap2 */
 #endif
 
 #define PSUIDFLAGS              (SNOCD|SUGID)
+#define RANDOM_LEN      16      /* 16 bytes for AT_RANDOM aux entry */
 
 /*
  * exece() - system call wrapper around exec_common()
  */
 int

@@ -295,18 +297,47 @@
 
         ua.fname = fname;
         ua.argp = argp;
         ua.envp = envp;
 
-        /* If necessary, brand this process before we start the exec. */
-        if (brandme)
-                brand_setbrand(p);
+        /* If necessary, brand this process/lwp before we start the exec. */
+        if (brandme) {
+                void *brand_data = NULL;
 
+                /*
+                 * Process branding may fail if multiple LWPs are present and
+                 * holdlwps() cannot complete successfully.
+                 */
+                error = brand_setbrand(p, B_TRUE);
+
+                if (error == 0 && BROP(p)->b_lwpdata_alloc != NULL) {
+                        brand_data = BROP(p)->b_lwpdata_alloc(p);
+                        if (brand_data == NULL) {
+                                error = 1;
+                        }
+                }
+
+                if (error == 0) {
+                        mutex_enter(&p->p_lock);
+                        BROP(p)->b_initlwp(lwp, brand_data);
+                        mutex_exit(&p->p_lock);
+                } else {
+                        VN_RELE(vp);
+                        if (dir != NULL) {
+                                VN_RELE(dir);
+                        }
+                        pn_free(&resolvepn);
+                        goto fail;
+                }
+        }
+
         if ((error = gexec(&vp, &ua, &args, NULL, 0, &execsz,
-            exec_file, p->p_cred, brand_action)) != 0) {
-                if (brandme)
-                        brand_clearbrand(p, B_FALSE);
+            exec_file, p->p_cred, &brand_action)) != 0) {
+                if (brandme) {
+                        BROP(p)->b_freelwp(lwp);
+                        brand_clearbrand(p, B_TRUE);
+                }
                 VN_RELE(vp);
                 if (dir != NULL)
                         VN_RELE(dir);
                 pn_free(&resolvepn);
                 goto fail;

@@ -334,11 +365,11 @@
         curthread->t_predcache = NULL;
 
         /*
          * Clear contract template state
          */
-        lwp_ctmpl_clear(lwp);
+        lwp_ctmpl_clear(lwp, B_TRUE);
 
         /*
          * Save the directory in which we found the executable for expanding
          * the %d token used in core file patterns.
          */

@@ -358,10 +389,12 @@
          * restart system calls; the new program's environment should
          * not be affected by detritus from the old program.  Any
          * pending held signals remain held, so don't clear t_hold.
          */
         mutex_enter(&p->p_lock);
+        DTRACE_PROBE3(oldcontext__set, klwp_t *, lwp,
+            uintptr_t, lwp->lwp_oldcontext, uintptr_t, 0);
         lwp->lwp_oldcontext = 0;
         lwp->lwp_ustack = 0;
         lwp->lwp_old_stk_ctl = 0;
         sigemptyset(&up->u_signodefer);
         sigemptyset(&up->u_sigonstack);

@@ -417,12 +450,14 @@
          */
         close_exec(P_FINFO(p));
         TRACE_2(TR_FAC_PROC, TR_PROC_EXEC, "proc_exec:p %p up %p", p, up);
 
         /* Unbrand ourself if necessary. */
-        if (PROC_IS_BRANDED(p) && (brand_action == EBA_NATIVE))
+        if (PROC_IS_BRANDED(p) && (brand_action == EBA_NATIVE)) {
+                BROP(p)->b_freelwp(lwp);
                 brand_clearbrand(p, B_FALSE);
+        }
 
         setregs(&args);
 
         /* Mark this as an executable vnode */
         mutex_enter(&vp->v_lock);

@@ -542,11 +577,11 @@
         struct intpdata *idatap,
         int level,
         long *execsz,
         caddr_t exec_file,
         struct cred *cred,
-        int brand_action)
+        int *brand_action)
 {
         struct vnode *vp, *execvp = NULL;
         proc_t *pp = ttoproc(curthread);
         struct execsw *eswp;
         int error = 0;

@@ -856,12 +891,18 @@
                          * invalidate the associated /proc vnode.
                          */
                         if (pp->p_plist || (pp->p_proc_flag & P_PR_TRACE))
                                 args->traceinval = 1;
                 }
-                if (pp->p_proc_flag & P_PR_PTRACE)
+
+                /*
+                 * If legacy ptrace is enabled, generate the SIGTRAP.
+                 */
+                if (pp->p_proc_flag & P_PR_PTRACE) {
                         psignal(pp, SIGTRAP);
+                }
+
                 if (args->traceinval)
                         prinvalidate(&pp->p_user);
         }
         if (execvp)
                 VN_RELE(execvp);

@@ -1515,11 +1556,30 @@
         args->stk_strp += len;
 
         return (0);
 }
 
+/*
+ * Add a fixed size byte array to the stack (only from kernel space).
+ */
 static int
+stk_byte_add(uarg_t *args, const uint8_t *sp, size_t len)
+{
+        if (STK_AVAIL(args) < sizeof (int))
+                return (E2BIG);
+        *--args->stk_offp = args->stk_strp - args->stk_base;
+
+        if (len > STK_AVAIL(args))
+                return (E2BIG);
+        bcopy(sp, args->stk_strp, len);
+
+        args->stk_strp += len;
+
+        return (0);
+}
+
+static int
 stk_getptr(uarg_t *args, char *src, char **dst)
 {
         int error;
 
         if (args->from_model == DATAMODEL_NATIVE) {

@@ -1551,10 +1611,11 @@
         int argv_empty = 0;
         size_t ptrsize = args->from_ptrsize;
         size_t size, pad;
         char *argv = (char *)uap->argp;
         char *envp = (char *)uap->envp;
+        uint8_t rdata[RANDOM_LEN];
 
         /*
          * Copy interpreter's name and argument to argv[0] and argv[1].
          * In the rare case that we have nested interpreters then those names
          * and arguments are also copied to the subsequent slots in argv.

@@ -1633,12 +1694,13 @@
         }
         args->na = (int *)(args->stk_base + args->stk_size) - args->stk_offp;
         args->ne = args->na - argc;
 
         /*
-         * Add AT_SUN_PLATFORM, AT_SUN_EXECNAME, AT_SUN_BRANDNAME, and
-         * AT_SUN_EMULATOR strings to the stack.
+         * Add AT_SUN_PLATFORM, AT_SUN_EXECNAME, AT_SUN_BRANDNAME,
+         * AT_SUN_BRAND_NROOT, and AT_SUN_EMULATOR strings, as well as AT_RANDOM
+         * array, to the stack.
          */
         if (auxvpp != NULL && *auxvpp != NULL) {
                 if ((error = stk_add(args, platform, UIO_SYSSPACE)) != 0)
                         return (error);
                 if ((error = stk_add(args, args->pathname, UIO_SYSSPACE)) != 0)

@@ -1647,10 +1709,24 @@
                     (error = stk_add(args, args->brandname, UIO_SYSSPACE)) != 0)
                         return (error);
                 if (args->emulator != NULL &&
                     (error = stk_add(args, args->emulator, UIO_SYSSPACE)) != 0)
                         return (error);
+
+                /*
+                 * For the AT_RANDOM aux vector we provide 16 bytes of random
+                 * data.
+                 */
+                (void) random_get_pseudo_bytes(rdata, sizeof (rdata));
+
+                if ((error = stk_byte_add(args, rdata, sizeof (rdata))) != 0)
+                        return (error);
+
+                if (args->brand_nroot != NULL &&
+                    (error = stk_add(args, args->brand_nroot,
+                    UIO_SYSSPACE)) != 0)
+                        return (error);
         }
 
         /*
          * Compute the size of the stack.  This includes all the pointers,
          * the space reserved for the aux vector, and all the strings.

@@ -1753,11 +1829,11 @@
                 return (-1);
 
         /*
          * Fill in the aux vector now that we know the user stack addresses
          * for the AT_SUN_PLATFORM, AT_SUN_EXECNAME, AT_SUN_BRANDNAME and
-         * AT_SUN_EMULATOR strings.
+         * AT_SUN_EMULATOR strings, as well as the AT_RANDOM array.
          */
         if (auxvpp != NULL && *auxvpp != NULL) {
                 if (args->to_model == DATAMODEL_NATIVE) {
                         auxv_t **a = (auxv_t **)auxvpp;
                         ADDAUX(*a, AT_SUN_PLATFORM, (long)&ustrp[*--offp])

@@ -1766,10 +1842,15 @@
                                 ADDAUX(*a,
                                     AT_SUN_BRANDNAME, (long)&ustrp[*--offp])
                         if (args->emulator != NULL)
                                 ADDAUX(*a,
                                     AT_SUN_EMULATOR, (long)&ustrp[*--offp])
+                        ADDAUX(*a, AT_RANDOM, (long)&ustrp[*--offp])
+                        if (args->brand_nroot != NULL) {
+                                ADDAUX(*a,
+                                    AT_SUN_BRAND_NROOT, (long)&ustrp[*--offp])
+                        }
                 } else {
                         auxv32_t **a = (auxv32_t **)auxvpp;
                         ADDAUX(*a,
                             AT_SUN_PLATFORM, (int)(uintptr_t)&ustrp[*--offp])
                         ADDAUX(*a,

@@ -1778,12 +1859,17 @@
                                 ADDAUX(*a, AT_SUN_BRANDNAME,
                                     (int)(uintptr_t)&ustrp[*--offp])
                         if (args->emulator != NULL)
                                 ADDAUX(*a, AT_SUN_EMULATOR,
                                     (int)(uintptr_t)&ustrp[*--offp])
+                        ADDAUX(*a, AT_RANDOM, (int)(uintptr_t)&ustrp[*--offp])
+                        if (args->brand_nroot != NULL) {
+                                ADDAUX(*a, AT_SUN_BRAND_NROOT,
+                                    (int)(uintptr_t)&ustrp[*--offp])
                 }
         }
+        }
 
         return (0);
 }
 
 /*

@@ -1866,10 +1952,13 @@
                 args->ncargs = NCARGS32;
                 args->stk_align = STACK_ALIGN32;
                 usrstack = (char *)USRSTACK32;
         }
 
+        if (args->maxstack != 0 && (uintptr_t)usrstack > args->maxstack)
+                usrstack = (char *)args->maxstack;
+
         ASSERT(P2PHASE((uintptr_t)usrstack, args->stk_align) == 0);
 
 #if defined(__sparc)
         /*
          * Make sure user register windows are empty before