Print this page
Reduce lint
OS-4818 contract template disappears on exec
OS-4460 exec brands processes that still have multiple threads
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-3742 lxbrand add support for signalfd
OS-4382 remove obsolete brand hooks added during lx development
OS-4188 NULL dereference in lwp_hash_in
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-4119 lxbrand panic when running native perl inside lx zone
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-4151 setbrand hooks should be sane during fork
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-4129 lxbrand should not abuse p_brand_data for storing exit signal
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-3561 lxbrand emulation library should execute on alternate stack
OS-3558 lxbrand add support for full in-kernel syscall handling
OS-3545 lx_syscall_regs should not walk stack
OS-3868 many LTP testcases now hang
OS-3901 lxbrand lx_recvmsg fails to translate control messages when 64-bit
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Bryan Cantrill <bryan@joyent.com>
OS-3871 AT_RANDOM aux entry should be populated using random_get_pseudo_bytes
OS-3611 lx brand: 64-bit processes should not use VAs above VA hole
OS-3438 lx brand: "start rsyslog" hangs
OS-3280 need a way to specify the root of a native system in the lx brand
OS-3279 lx brand should allow delegated datasets
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-2949 add support for AT_RANDOM aux vector entry
        
*** 24,34 ****
   */
  
  /*      Copyright (c) 1988 AT&T */
  /*        All Rights Reserved   */
  /*
!  * Copyright 2014, Joyent, Inc.  All rights reserved.
   */
  
  #include <sys/types.h>
  #include <sys/param.h>
  #include <sys/sysmacros.h>
--- 24,34 ----
   */
  
  /*      Copyright (c) 1988 AT&T */
  /*        All Rights Reserved   */
  /*
!  * Copyright 2015, Joyent, Inc.  All rights reserved.
   */
  
  #include <sys/types.h>
  #include <sys/param.h>
  #include <sys/sysmacros.h>
*** 67,76 ****
--- 67,77 ----
  #include <sys/lwpchan_impl.h>
  #include <sys/pool.h>
  #include <sys/sdt.h>
  #include <sys/brand.h>
  #include <sys/klpd.h>
+ #include <sys/random.h>
  
  #include <c2/audit.h>
  
  #include <vm/hat.h>
  #include <vm/anon.h>
*** 95,104 ****
--- 96,106 ----
  uint_t auxv_hwcap32 = 0;        /* 32-bit version of auxv_hwcap */
  uint_t auxv_hwcap32_2 = 0;      /* 32-bit version of auxv_hwcap2 */
  #endif
  
  #define PSUIDFLAGS              (SNOCD|SUGID)
+ #define RANDOM_LEN      16      /* 16 bytes for AT_RANDOM aux entry */
  
  /*
   * exece() - system call wrapper around exec_common()
   */
  int
*** 295,312 ****
  
          ua.fname = fname;
          ua.argp = argp;
          ua.envp = envp;
  
!         /* If necessary, brand this process before we start the exec. */
!         if (brandme)
!                 brand_setbrand(p);
  
          if ((error = gexec(&vp, &ua, &args, NULL, 0, &execsz,
!             exec_file, p->p_cred, brand_action)) != 0) {
!                 if (brandme)
!                         brand_clearbrand(p, B_FALSE);
                  VN_RELE(vp);
                  if (dir != NULL)
                          VN_RELE(dir);
                  pn_free(&resolvepn);
                  goto fail;
--- 297,343 ----
  
          ua.fname = fname;
          ua.argp = argp;
          ua.envp = envp;
  
!         /* If necessary, brand this process/lwp before we start the exec. */
!         if (brandme) {
!                 void *brand_data = NULL;
  
+                 /*
+                  * Process branding may fail if multiple LWPs are present and
+                  * holdlwps() cannot complete successfully.
+                  */
+                 error = brand_setbrand(p, B_TRUE);
+ 
+                 if (error == 0 && BROP(p)->b_lwpdata_alloc != NULL) {
+                         brand_data = BROP(p)->b_lwpdata_alloc(p);
+                         if (brand_data == NULL) {
+                                 error = 1;
+                         }
+                 }
+ 
+                 if (error == 0) {
+                         mutex_enter(&p->p_lock);
+                         BROP(p)->b_initlwp(lwp, brand_data);
+                         mutex_exit(&p->p_lock);
+                 } else {
+                         VN_RELE(vp);
+                         if (dir != NULL) {
+                                 VN_RELE(dir);
+                         }
+                         pn_free(&resolvepn);
+                         goto fail;
+                 }
+         }
+ 
          if ((error = gexec(&vp, &ua, &args, NULL, 0, &execsz,
!             exec_file, p->p_cred, &brand_action)) != 0) {
!                 if (brandme) {
!                         BROP(p)->b_freelwp(lwp);
!                         brand_clearbrand(p, B_TRUE);
!                 }
                  VN_RELE(vp);
                  if (dir != NULL)
                          VN_RELE(dir);
                  pn_free(&resolvepn);
                  goto fail;
*** 334,344 ****
          curthread->t_predcache = NULL;
  
          /*
           * Clear contract template state
           */
!         lwp_ctmpl_clear(lwp);
  
          /*
           * Save the directory in which we found the executable for expanding
           * the %d token used in core file patterns.
           */
--- 365,375 ----
          curthread->t_predcache = NULL;
  
          /*
           * Clear contract template state
           */
!         lwp_ctmpl_clear(lwp, B_TRUE);
  
          /*
           * Save the directory in which we found the executable for expanding
           * the %d token used in core file patterns.
           */
*** 358,367 ****
--- 389,400 ----
           * restart system calls; the new program's environment should
           * not be affected by detritus from the old program.  Any
           * pending held signals remain held, so don't clear t_hold.
           */
          mutex_enter(&p->p_lock);
+         DTRACE_PROBE3(oldcontext__set, klwp_t *, lwp,
+             uintptr_t, lwp->lwp_oldcontext, uintptr_t, 0);
          lwp->lwp_oldcontext = 0;
          lwp->lwp_ustack = 0;
          lwp->lwp_old_stk_ctl = 0;
          sigemptyset(&up->u_signodefer);
          sigemptyset(&up->u_sigonstack);
*** 417,428 ****
           */
          close_exec(P_FINFO(p));
          TRACE_2(TR_FAC_PROC, TR_PROC_EXEC, "proc_exec:p %p up %p", p, up);
  
          /* Unbrand ourself if necessary. */
!         if (PROC_IS_BRANDED(p) && (brand_action == EBA_NATIVE))
                  brand_clearbrand(p, B_FALSE);
  
          setregs(&args);
  
          /* Mark this as an executable vnode */
          mutex_enter(&vp->v_lock);
--- 450,463 ----
           */
          close_exec(P_FINFO(p));
          TRACE_2(TR_FAC_PROC, TR_PROC_EXEC, "proc_exec:p %p up %p", p, up);
  
          /* Unbrand ourself if necessary. */
!         if (PROC_IS_BRANDED(p) && (brand_action == EBA_NATIVE)) {
!                 BROP(p)->b_freelwp(lwp);
                  brand_clearbrand(p, B_FALSE);
+         }
  
          setregs(&args);
  
          /* Mark this as an executable vnode */
          mutex_enter(&vp->v_lock);
*** 542,552 ****
          struct intpdata *idatap,
          int level,
          long *execsz,
          caddr_t exec_file,
          struct cred *cred,
!         int brand_action)
  {
          struct vnode *vp, *execvp = NULL;
          proc_t *pp = ttoproc(curthread);
          struct execsw *eswp;
          int error = 0;
--- 577,587 ----
          struct intpdata *idatap,
          int level,
          long *execsz,
          caddr_t exec_file,
          struct cred *cred,
!         int *brand_action)
  {
          struct vnode *vp, *execvp = NULL;
          proc_t *pp = ttoproc(curthread);
          struct execsw *eswp;
          int error = 0;
*** 856,867 ****
                           * invalidate the associated /proc vnode.
                           */
                          if (pp->p_plist || (pp->p_proc_flag & P_PR_TRACE))
                                  args->traceinval = 1;
                  }
!                 if (pp->p_proc_flag & P_PR_PTRACE)
                          psignal(pp, SIGTRAP);
                  if (args->traceinval)
                          prinvalidate(&pp->p_user);
          }
          if (execvp)
                  VN_RELE(execvp);
--- 891,908 ----
                           * invalidate the associated /proc vnode.
                           */
                          if (pp->p_plist || (pp->p_proc_flag & P_PR_TRACE))
                                  args->traceinval = 1;
                  }
! 
!                 /*
!                  * If legacy ptrace is enabled, generate the SIGTRAP.
!                  */
!                 if (pp->p_proc_flag & P_PR_PTRACE) {
                          psignal(pp, SIGTRAP);
+                 }
+ 
                  if (args->traceinval)
                          prinvalidate(&pp->p_user);
          }
          if (execvp)
                  VN_RELE(execvp);
*** 1515,1525 ****
--- 1556,1585 ----
          args->stk_strp += len;
  
          return (0);
  }
  
+ /*
+  * Add a fixed size byte array to the stack (only from kernel space).
+  */
  static int
+ stk_byte_add(uarg_t *args, const uint8_t *sp, size_t len)
+ {
+         if (STK_AVAIL(args) < sizeof (int))
+                 return (E2BIG);
+         *--args->stk_offp = args->stk_strp - args->stk_base;
+ 
+         if (len > STK_AVAIL(args))
+                 return (E2BIG);
+         bcopy(sp, args->stk_strp, len);
+ 
+         args->stk_strp += len;
+ 
+         return (0);
+ }
+ 
+ static int
  stk_getptr(uarg_t *args, char *src, char **dst)
  {
          int error;
  
          if (args->from_model == DATAMODEL_NATIVE) {
*** 1551,1560 ****
--- 1611,1621 ----
          int argv_empty = 0;
          size_t ptrsize = args->from_ptrsize;
          size_t size, pad;
          char *argv = (char *)uap->argp;
          char *envp = (char *)uap->envp;
+         uint8_t rdata[RANDOM_LEN];
  
          /*
           * Copy interpreter's name and argument to argv[0] and argv[1].
           * In the rare case that we have nested interpreters then those names
           * and arguments are also copied to the subsequent slots in argv.
*** 1633,1644 ****
          }
          args->na = (int *)(args->stk_base + args->stk_size) - args->stk_offp;
          args->ne = args->na - argc;
  
          /*
!          * Add AT_SUN_PLATFORM, AT_SUN_EXECNAME, AT_SUN_BRANDNAME, and
!          * AT_SUN_EMULATOR strings to the stack.
           */
          if (auxvpp != NULL && *auxvpp != NULL) {
                  if ((error = stk_add(args, platform, UIO_SYSSPACE)) != 0)
                          return (error);
                  if ((error = stk_add(args, args->pathname, UIO_SYSSPACE)) != 0)
--- 1694,1706 ----
          }
          args->na = (int *)(args->stk_base + args->stk_size) - args->stk_offp;
          args->ne = args->na - argc;
  
          /*
!          * Add AT_SUN_PLATFORM, AT_SUN_EXECNAME, AT_SUN_BRANDNAME,
!          * AT_SUN_BRAND_NROOT, and AT_SUN_EMULATOR strings, as well as AT_RANDOM
!          * array, to the stack.
           */
          if (auxvpp != NULL && *auxvpp != NULL) {
                  if ((error = stk_add(args, platform, UIO_SYSSPACE)) != 0)
                          return (error);
                  if ((error = stk_add(args, args->pathname, UIO_SYSSPACE)) != 0)
*** 1647,1656 ****
--- 1709,1732 ----
                      (error = stk_add(args, args->brandname, UIO_SYSSPACE)) != 0)
                          return (error);
                  if (args->emulator != NULL &&
                      (error = stk_add(args, args->emulator, UIO_SYSSPACE)) != 0)
                          return (error);
+ 
+                 /*
+                  * For the AT_RANDOM aux vector we provide 16 bytes of random
+                  * data.
+                  */
+                 (void) random_get_pseudo_bytes(rdata, sizeof (rdata));
+ 
+                 if ((error = stk_byte_add(args, rdata, sizeof (rdata))) != 0)
+                         return (error);
+ 
+                 if (args->brand_nroot != NULL &&
+                     (error = stk_add(args, args->brand_nroot,
+                     UIO_SYSSPACE)) != 0)
+                         return (error);
          }
  
          /*
           * Compute the size of the stack.  This includes all the pointers,
           * the space reserved for the aux vector, and all the strings.
*** 1753,1763 ****
                  return (-1);
  
          /*
           * Fill in the aux vector now that we know the user stack addresses
           * for the AT_SUN_PLATFORM, AT_SUN_EXECNAME, AT_SUN_BRANDNAME and
!          * AT_SUN_EMULATOR strings.
           */
          if (auxvpp != NULL && *auxvpp != NULL) {
                  if (args->to_model == DATAMODEL_NATIVE) {
                          auxv_t **a = (auxv_t **)auxvpp;
                          ADDAUX(*a, AT_SUN_PLATFORM, (long)&ustrp[*--offp])
--- 1829,1839 ----
                  return (-1);
  
          /*
           * Fill in the aux vector now that we know the user stack addresses
           * for the AT_SUN_PLATFORM, AT_SUN_EXECNAME, AT_SUN_BRANDNAME and
!          * AT_SUN_EMULATOR strings, as well as the AT_RANDOM array.
           */
          if (auxvpp != NULL && *auxvpp != NULL) {
                  if (args->to_model == DATAMODEL_NATIVE) {
                          auxv_t **a = (auxv_t **)auxvpp;
                          ADDAUX(*a, AT_SUN_PLATFORM, (long)&ustrp[*--offp])
*** 1766,1775 ****
--- 1842,1856 ----
                                  ADDAUX(*a,
                                      AT_SUN_BRANDNAME, (long)&ustrp[*--offp])
                          if (args->emulator != NULL)
                                  ADDAUX(*a,
                                      AT_SUN_EMULATOR, (long)&ustrp[*--offp])
+                         ADDAUX(*a, AT_RANDOM, (long)&ustrp[*--offp])
+                         if (args->brand_nroot != NULL) {
+                                 ADDAUX(*a,
+                                     AT_SUN_BRAND_NROOT, (long)&ustrp[*--offp])
+                         }
                  } else {
                          auxv32_t **a = (auxv32_t **)auxvpp;
                          ADDAUX(*a,
                              AT_SUN_PLATFORM, (int)(uintptr_t)&ustrp[*--offp])
                          ADDAUX(*a,
*** 1778,1789 ****
--- 1859,1875 ----
                                  ADDAUX(*a, AT_SUN_BRANDNAME,
                                      (int)(uintptr_t)&ustrp[*--offp])
                          if (args->emulator != NULL)
                                  ADDAUX(*a, AT_SUN_EMULATOR,
                                      (int)(uintptr_t)&ustrp[*--offp])
+                         ADDAUX(*a, AT_RANDOM, (int)(uintptr_t)&ustrp[*--offp])
+                         if (args->brand_nroot != NULL) {
+                                 ADDAUX(*a, AT_SUN_BRAND_NROOT,
+                                     (int)(uintptr_t)&ustrp[*--offp])
                          }
                  }
+         }
  
          return (0);
  }
  
  /*
*** 1866,1875 ****
--- 1952,1964 ----
                  args->ncargs = NCARGS32;
                  args->stk_align = STACK_ALIGN32;
                  usrstack = (char *)USRSTACK32;
          }
  
+         if (args->maxstack != 0 && (uintptr_t)usrstack > args->maxstack)
+                 usrstack = (char *)args->maxstack;
+ 
          ASSERT(P2PHASE((uintptr_t)usrstack, args->stk_align) == 0);
  
  #if defined(__sparc)
          /*
           * Make sure user register windows are empty before