Print this page
Reduce lint
OS-4818 contract template disappears on exec
OS-4460 exec brands processes that still have multiple threads
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-3742 lxbrand add support for signalfd
OS-4382 remove obsolete brand hooks added during lx development
OS-4188 NULL dereference in lwp_hash_in
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-4119 lxbrand panic when running native perl inside lx zone
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-4151 setbrand hooks should be sane during fork
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-4129 lxbrand should not abuse p_brand_data for storing exit signal
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-3561 lxbrand emulation library should execute on alternate stack
OS-3558 lxbrand add support for full in-kernel syscall handling
OS-3545 lx_syscall_regs should not walk stack
OS-3868 many LTP testcases now hang
OS-3901 lxbrand lx_recvmsg fails to translate control messages when 64-bit
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Bryan Cantrill <bryan@joyent.com>
OS-3871 AT_RANDOM aux entry should be populated using random_get_pseudo_bytes
OS-3611 lx brand: 64-bit processes should not use VAs above VA hole
OS-3438 lx brand: "start rsyslog" hangs
OS-3280 need a way to specify the root of a native system in the lx brand
OS-3279 lx brand should allow delegated datasets
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-2949 add support for AT_RANDOM aux vector entry

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/os/exec.c
          +++ new/usr/src/uts/common/os/exec.c
↓ open down ↓ 18 lines elided ↑ open up ↑
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   */
  25   25  
  26   26  /*      Copyright (c) 1988 AT&T */
  27   27  /*        All Rights Reserved   */
  28   28  /*
  29      - * Copyright 2014, Joyent, Inc.  All rights reserved.
       29 + * Copyright 2015, Joyent, Inc.  All rights reserved.
  30   30   */
  31   31  
  32   32  #include <sys/types.h>
  33   33  #include <sys/param.h>
  34   34  #include <sys/sysmacros.h>
  35   35  #include <sys/systm.h>
  36   36  #include <sys/signal.h>
  37   37  #include <sys/cred_impl.h>
  38   38  #include <sys/policy.h>
  39   39  #include <sys/user.h>
↓ open down ↓ 22 lines elided ↑ open up ↑
  62   62  #include <sys/utrap.h>
  63   63  #include <sys/systeminfo.h>
  64   64  #include <sys/stack.h>
  65   65  #include <sys/rctl.h>
  66   66  #include <sys/dtrace.h>
  67   67  #include <sys/lwpchan_impl.h>
  68   68  #include <sys/pool.h>
  69   69  #include <sys/sdt.h>
  70   70  #include <sys/brand.h>
  71   71  #include <sys/klpd.h>
       72 +#include <sys/random.h>
  72   73  
  73   74  #include <c2/audit.h>
  74   75  
  75   76  #include <vm/hat.h>
  76   77  #include <vm/anon.h>
  77   78  #include <vm/as.h>
  78   79  #include <vm/seg.h>
  79   80  #include <vm/seg_vn.h>
  80   81  
  81   82  #define PRIV_RESET              0x01    /* needs to reset privs */
↓ open down ↓ 8 lines elided ↑ open up ↑
  90   91  static int hold_execsw(struct execsw *);
  91   92  
  92   93  uint_t auxv_hwcap = 0;  /* auxv AT_SUN_HWCAP value; determined on the fly */
  93   94  uint_t auxv_hwcap_2 = 0;        /* AT_SUN_HWCAP2 */
  94   95  #if defined(_SYSCALL32_IMPL)
  95   96  uint_t auxv_hwcap32 = 0;        /* 32-bit version of auxv_hwcap */
  96   97  uint_t auxv_hwcap32_2 = 0;      /* 32-bit version of auxv_hwcap2 */
  97   98  #endif
  98   99  
  99  100  #define PSUIDFLAGS              (SNOCD|SUGID)
      101 +#define RANDOM_LEN      16      /* 16 bytes for AT_RANDOM aux entry */
 100  102  
 101  103  /*
 102  104   * exece() - system call wrapper around exec_common()
 103  105   */
 104  106  int
 105  107  exece(const char *fname, const char **argp, const char **envp)
 106  108  {
 107  109          int error;
 108  110  
 109  111          error = exec_common(fname, argp, envp, EBA_NONE);
↓ open down ↓ 180 lines elided ↑ open up ↑
 290  292          args.stk_prot = PROT_ZFOD;
 291  293          args.dat_prot = PROT_ZFOD;
 292  294  
 293  295          CPU_STATS_ADD_K(sys, sysexec, 1);
 294  296          DTRACE_PROC1(exec, char *, args.pathname);
 295  297  
 296  298          ua.fname = fname;
 297  299          ua.argp = argp;
 298  300          ua.envp = envp;
 299  301  
 300      -        /* If necessary, brand this process before we start the exec. */
 301      -        if (brandme)
 302      -                brand_setbrand(p);
      302 +        /* If necessary, brand this process/lwp before we start the exec. */
      303 +        if (brandme) {
      304 +                void *brand_data = NULL;
 303  305  
      306 +                /*
      307 +                 * Process branding may fail if multiple LWPs are present and
      308 +                 * holdlwps() cannot complete successfully.
      309 +                 */
      310 +                error = brand_setbrand(p, B_TRUE);
      311 +
      312 +                if (error == 0 && BROP(p)->b_lwpdata_alloc != NULL) {
      313 +                        brand_data = BROP(p)->b_lwpdata_alloc(p);
      314 +                        if (brand_data == NULL) {
      315 +                                error = 1;
      316 +                        }
      317 +                }
      318 +
      319 +                if (error == 0) {
      320 +                        mutex_enter(&p->p_lock);
      321 +                        BROP(p)->b_initlwp(lwp, brand_data);
      322 +                        mutex_exit(&p->p_lock);
      323 +                } else {
      324 +                        VN_RELE(vp);
      325 +                        if (dir != NULL) {
      326 +                                VN_RELE(dir);
      327 +                        }
      328 +                        pn_free(&resolvepn);
      329 +                        goto fail;
      330 +                }
      331 +        }
      332 +
 304  333          if ((error = gexec(&vp, &ua, &args, NULL, 0, &execsz,
 305      -            exec_file, p->p_cred, brand_action)) != 0) {
 306      -                if (brandme)
 307      -                        brand_clearbrand(p, B_FALSE);
      334 +            exec_file, p->p_cred, &brand_action)) != 0) {
      335 +                if (brandme) {
      336 +                        BROP(p)->b_freelwp(lwp);
      337 +                        brand_clearbrand(p, B_TRUE);
      338 +                }
 308  339                  VN_RELE(vp);
 309  340                  if (dir != NULL)
 310  341                          VN_RELE(dir);
 311  342                  pn_free(&resolvepn);
 312  343                  goto fail;
 313  344          }
 314  345  
 315  346          /*
 316  347           * Free floating point registers (sun4u only)
 317  348           */
↓ open down ↓ 11 lines elided ↑ open up ↑
 329  360          /*
 330  361           * Remember file name for accounting; clear any cached DTrace predicate.
 331  362           */
 332  363          up->u_acflag &= ~AFORK;
 333  364          bcopy(exec_file, up->u_comm, MAXCOMLEN+1);
 334  365          curthread->t_predcache = NULL;
 335  366  
 336  367          /*
 337  368           * Clear contract template state
 338  369           */
 339      -        lwp_ctmpl_clear(lwp);
      370 +        lwp_ctmpl_clear(lwp, B_TRUE);
 340  371  
 341  372          /*
 342  373           * Save the directory in which we found the executable for expanding
 343  374           * the %d token used in core file patterns.
 344  375           */
 345  376          mutex_enter(&p->p_lock);
 346  377          tmpvp = p->p_execdir;
 347  378          p->p_execdir = dir;
 348  379          if (p->p_execdir != NULL)
 349  380                  VN_HOLD(p->p_execdir);
↓ open down ↓ 3 lines elided ↑ open up ↑
 353  384                  VN_RELE(tmpvp);
 354  385  
 355  386          /*
 356  387           * Reset stack state to the user stack, clear set of signals
 357  388           * caught on the signal stack, and reset list of signals that
 358  389           * restart system calls; the new program's environment should
 359  390           * not be affected by detritus from the old program.  Any
 360  391           * pending held signals remain held, so don't clear t_hold.
 361  392           */
 362  393          mutex_enter(&p->p_lock);
      394 +        DTRACE_PROBE3(oldcontext__set, klwp_t *, lwp,
      395 +            uintptr_t, lwp->lwp_oldcontext, uintptr_t, 0);
 363  396          lwp->lwp_oldcontext = 0;
 364  397          lwp->lwp_ustack = 0;
 365  398          lwp->lwp_old_stk_ctl = 0;
 366  399          sigemptyset(&up->u_signodefer);
 367  400          sigemptyset(&up->u_sigonstack);
 368  401          sigemptyset(&up->u_sigresethand);
 369  402          lwp->lwp_sigaltstack.ss_sp = 0;
 370  403          lwp->lwp_sigaltstack.ss_size = 0;
 371  404          lwp->lwp_sigaltstack.ss_flags = SS_DISABLE;
 372  405  
↓ open down ↓ 39 lines elided ↑ open up ↑
 412  445                  utrap_free(p);
 413  446  #endif  /* __sparc */
 414  447  
 415  448          /*
 416  449           * Close all close-on-exec files.
 417  450           */
 418  451          close_exec(P_FINFO(p));
 419  452          TRACE_2(TR_FAC_PROC, TR_PROC_EXEC, "proc_exec:p %p up %p", p, up);
 420  453  
 421  454          /* Unbrand ourself if necessary. */
 422      -        if (PROC_IS_BRANDED(p) && (brand_action == EBA_NATIVE))
      455 +        if (PROC_IS_BRANDED(p) && (brand_action == EBA_NATIVE)) {
      456 +                BROP(p)->b_freelwp(lwp);
 423  457                  brand_clearbrand(p, B_FALSE);
      458 +        }
 424  459  
 425  460          setregs(&args);
 426  461  
 427  462          /* Mark this as an executable vnode */
 428  463          mutex_enter(&vp->v_lock);
 429  464          vp->v_flag |= VVMEXEC;
 430  465          mutex_exit(&vp->v_lock);
 431  466  
 432  467          VN_RELE(vp);
 433  468          if (dir != NULL)
↓ open down ↓ 103 lines elided ↑ open up ↑
 537  572  int
 538  573  gexec(
 539  574          struct vnode **vpp,
 540  575          struct execa *uap,
 541  576          struct uarg *args,
 542  577          struct intpdata *idatap,
 543  578          int level,
 544  579          long *execsz,
 545  580          caddr_t exec_file,
 546  581          struct cred *cred,
 547      -        int brand_action)
      582 +        int *brand_action)
 548  583  {
 549  584          struct vnode *vp, *execvp = NULL;
 550  585          proc_t *pp = ttoproc(curthread);
 551  586          struct execsw *eswp;
 552  587          int error = 0;
 553  588          int suidflags = 0;
 554  589          ssize_t resid;
 555  590          uid_t uid, gid;
 556  591          struct vattr vattr;
 557  592          char magbuf[MAGIC_BYTES];
↓ open down ↓ 293 lines elided ↑ open up ↑
 851  886                          mutex_exit(&pp->p_lock);
 852  887                  }
 853  888                  if (setid && (pp->p_proc_flag & P_PR_PTRACE) == 0) {
 854  889                          /*
 855  890                           * If process is traced via /proc, arrange to
 856  891                           * invalidate the associated /proc vnode.
 857  892                           */
 858  893                          if (pp->p_plist || (pp->p_proc_flag & P_PR_TRACE))
 859  894                                  args->traceinval = 1;
 860  895                  }
 861      -                if (pp->p_proc_flag & P_PR_PTRACE)
      896 +
      897 +                /*
      898 +                 * If legacy ptrace is enabled, generate the SIGTRAP.
      899 +                 */
      900 +                if (pp->p_proc_flag & P_PR_PTRACE) {
 862  901                          psignal(pp, SIGTRAP);
      902 +                }
      903 +
 863  904                  if (args->traceinval)
 864  905                          prinvalidate(&pp->p_user);
 865  906          }
 866  907          if (execvp)
 867  908                  VN_RELE(execvp);
 868  909          return (0);
 869  910  
 870  911  bad:
 871  912          (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, cred, NULL);
 872  913  
↓ open down ↓ 637 lines elided ↑ open up ↑
1510 1551                  if (len > STK_AVAIL(args))
1511 1552                          return (E2BIG);
1512 1553                  bcopy(sp, args->stk_strp, len);
1513 1554          }
1514 1555  
1515 1556          args->stk_strp += len;
1516 1557  
1517 1558          return (0);
1518 1559  }
1519 1560  
     1561 +/*
     1562 + * Add a fixed size byte array to the stack (only from kernel space).
     1563 + */
1520 1564  static int
     1565 +stk_byte_add(uarg_t *args, const uint8_t *sp, size_t len)
     1566 +{
     1567 +        if (STK_AVAIL(args) < sizeof (int))
     1568 +                return (E2BIG);
     1569 +        *--args->stk_offp = args->stk_strp - args->stk_base;
     1570 +
     1571 +        if (len > STK_AVAIL(args))
     1572 +                return (E2BIG);
     1573 +        bcopy(sp, args->stk_strp, len);
     1574 +
     1575 +        args->stk_strp += len;
     1576 +
     1577 +        return (0);
     1578 +}
     1579 +
     1580 +static int
1521 1581  stk_getptr(uarg_t *args, char *src, char **dst)
1522 1582  {
1523 1583          int error;
1524 1584  
1525 1585          if (args->from_model == DATAMODEL_NATIVE) {
1526 1586                  ulong_t ptr;
1527 1587                  error = fulword(src, &ptr);
1528 1588                  *dst = (caddr_t)ptr;
1529 1589          } else {
1530 1590                  uint32_t ptr;
↓ open down ↓ 15 lines elided ↑ open up ↑
1546 1606  static int
1547 1607  stk_copyin(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp)
1548 1608  {
1549 1609          char *sp;
1550 1610          int argc, error;
1551 1611          int argv_empty = 0;
1552 1612          size_t ptrsize = args->from_ptrsize;
1553 1613          size_t size, pad;
1554 1614          char *argv = (char *)uap->argp;
1555 1615          char *envp = (char *)uap->envp;
     1616 +        uint8_t rdata[RANDOM_LEN];
1556 1617  
1557 1618          /*
1558 1619           * Copy interpreter's name and argument to argv[0] and argv[1].
1559 1620           * In the rare case that we have nested interpreters then those names
1560 1621           * and arguments are also copied to the subsequent slots in argv.
1561 1622           */
1562 1623          if (intp != NULL && intp->intp_name[0] != NULL) {
1563 1624                  int i;
1564 1625  
1565 1626                  for (i = 0; i < INTP_MAXDEPTH; i++) {
↓ open down ↓ 62 lines elided ↑ open up ↑
1628 1689                                  args->stk_strp = tmp;
1629 1690                                  *(args->stk_offp++) = NULL;
1630 1691                          }
1631 1692                          envp += ptrsize;
1632 1693                  }
1633 1694          }
1634 1695          args->na = (int *)(args->stk_base + args->stk_size) - args->stk_offp;
1635 1696          args->ne = args->na - argc;
1636 1697  
1637 1698          /*
1638      -         * Add AT_SUN_PLATFORM, AT_SUN_EXECNAME, AT_SUN_BRANDNAME, and
1639      -         * AT_SUN_EMULATOR strings to the stack.
     1699 +         * Add AT_SUN_PLATFORM, AT_SUN_EXECNAME, AT_SUN_BRANDNAME,
     1700 +         * AT_SUN_BRAND_NROOT, and AT_SUN_EMULATOR strings, as well as AT_RANDOM
     1701 +         * array, to the stack.
1640 1702           */
1641 1703          if (auxvpp != NULL && *auxvpp != NULL) {
1642 1704                  if ((error = stk_add(args, platform, UIO_SYSSPACE)) != 0)
1643 1705                          return (error);
1644 1706                  if ((error = stk_add(args, args->pathname, UIO_SYSSPACE)) != 0)
1645 1707                          return (error);
1646 1708                  if (args->brandname != NULL &&
1647 1709                      (error = stk_add(args, args->brandname, UIO_SYSSPACE)) != 0)
1648 1710                          return (error);
1649 1711                  if (args->emulator != NULL &&
1650 1712                      (error = stk_add(args, args->emulator, UIO_SYSSPACE)) != 0)
1651 1713                          return (error);
     1714 +
     1715 +                /*
     1716 +                 * For the AT_RANDOM aux vector we provide 16 bytes of random
     1717 +                 * data.
     1718 +                 */
     1719 +                (void) random_get_pseudo_bytes(rdata, sizeof (rdata));
     1720 +
     1721 +                if ((error = stk_byte_add(args, rdata, sizeof (rdata))) != 0)
     1722 +                        return (error);
     1723 +
     1724 +                if (args->brand_nroot != NULL &&
     1725 +                    (error = stk_add(args, args->brand_nroot,
     1726 +                    UIO_SYSSPACE)) != 0)
     1727 +                        return (error);
1652 1728          }
1653 1729  
1654 1730          /*
1655 1731           * Compute the size of the stack.  This includes all the pointers,
1656 1732           * the space reserved for the aux vector, and all the strings.
1657 1733           * The total number of pointers is args->na (which is argc + envc)
1658 1734           * plus 4 more: (1) a pointer's worth of space for argc; (2) the NULL
1659 1735           * after the last argument (i.e. argv[argc]); (3) the NULL after the
1660 1736           * last environment variable (i.e. envp[envc]); and (4) the NULL after
1661 1737           * all the strings, at the very top of the stack.
↓ open down ↓ 86 lines elided ↑ open up ↑
1748 1824  
1749 1825          /*
1750 1826           * Put all the argv[], envp[], and auxv strings on the stack.
1751 1827           */
1752 1828          if (copyout(args->stk_base, ustrp, args->nc))
1753 1829                  return (-1);
1754 1830  
1755 1831          /*
1756 1832           * Fill in the aux vector now that we know the user stack addresses
1757 1833           * for the AT_SUN_PLATFORM, AT_SUN_EXECNAME, AT_SUN_BRANDNAME and
1758      -         * AT_SUN_EMULATOR strings.
     1834 +         * AT_SUN_EMULATOR strings, as well as the AT_RANDOM array.
1759 1835           */
1760 1836          if (auxvpp != NULL && *auxvpp != NULL) {
1761 1837                  if (args->to_model == DATAMODEL_NATIVE) {
1762 1838                          auxv_t **a = (auxv_t **)auxvpp;
1763 1839                          ADDAUX(*a, AT_SUN_PLATFORM, (long)&ustrp[*--offp])
1764 1840                          ADDAUX(*a, AT_SUN_EXECNAME, (long)&ustrp[*--offp])
1765 1841                          if (args->brandname != NULL)
1766 1842                                  ADDAUX(*a,
1767 1843                                      AT_SUN_BRANDNAME, (long)&ustrp[*--offp])
1768 1844                          if (args->emulator != NULL)
1769 1845                                  ADDAUX(*a,
1770 1846                                      AT_SUN_EMULATOR, (long)&ustrp[*--offp])
     1847 +                        ADDAUX(*a, AT_RANDOM, (long)&ustrp[*--offp])
     1848 +                        if (args->brand_nroot != NULL) {
     1849 +                                ADDAUX(*a,
     1850 +                                    AT_SUN_BRAND_NROOT, (long)&ustrp[*--offp])
     1851 +                        }
1771 1852                  } else {
1772 1853                          auxv32_t **a = (auxv32_t **)auxvpp;
1773 1854                          ADDAUX(*a,
1774 1855                              AT_SUN_PLATFORM, (int)(uintptr_t)&ustrp[*--offp])
1775 1856                          ADDAUX(*a,
1776 1857                              AT_SUN_EXECNAME, (int)(uintptr_t)&ustrp[*--offp])
1777 1858                          if (args->brandname != NULL)
1778 1859                                  ADDAUX(*a, AT_SUN_BRANDNAME,
1779 1860                                      (int)(uintptr_t)&ustrp[*--offp])
1780 1861                          if (args->emulator != NULL)
1781 1862                                  ADDAUX(*a, AT_SUN_EMULATOR,
1782 1863                                      (int)(uintptr_t)&ustrp[*--offp])
     1864 +                        ADDAUX(*a, AT_RANDOM, (int)(uintptr_t)&ustrp[*--offp])
     1865 +                        if (args->brand_nroot != NULL) {
     1866 +                                ADDAUX(*a, AT_SUN_BRAND_NROOT,
     1867 +                                    (int)(uintptr_t)&ustrp[*--offp])
     1868 +                        }
1783 1869                  }
1784 1870          }
1785 1871  
1786 1872          return (0);
1787 1873  }
1788 1874  
1789 1875  /*
1790 1876   * Initialize a new user stack with the specified arguments and environment.
1791 1877   * The initial user stack layout is as follows:
1792 1878   *
↓ open down ↓ 68 lines elided ↑ open up ↑
1861 1947                          usrstack = (char *)USRSTACK64_32;
1862 1948                  else
1863 1949                          usrstack = (char *)USRSTACK;
1864 1950          } else {
1865 1951                  args->to_ptrsize = sizeof (int32_t);
1866 1952                  args->ncargs = NCARGS32;
1867 1953                  args->stk_align = STACK_ALIGN32;
1868 1954                  usrstack = (char *)USRSTACK32;
1869 1955          }
1870 1956  
     1957 +        if (args->maxstack != 0 && (uintptr_t)usrstack > args->maxstack)
     1958 +                usrstack = (char *)args->maxstack;
     1959 +
1871 1960          ASSERT(P2PHASE((uintptr_t)usrstack, args->stk_align) == 0);
1872 1961  
1873 1962  #if defined(__sparc)
1874 1963          /*
1875 1964           * Make sure user register windows are empty before
1876 1965           * attempting to make a new stack.
1877 1966           */
1878 1967          (void) flush_user_windows_to_stack(NULL);
1879 1968  #endif
1880 1969  
↓ open down ↓ 166 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX