Print this page
OS-5598 newproc() performs inadequate clean-up after failed lwp_create()
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Approved by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-4818 contract template disappears on exec
OS-4825 cgroup user agent should be launched from the kernel
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
OS-4460 exec brands processes that still have multiple threads
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-4151 setbrand hooks should be sane during fork
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-4129 lxbrand should not abuse p_brand_data for storing exit signal
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/os/fork.c
          +++ new/usr/src/uts/common/os/fork.c
↓ open down ↓ 13 lines elided ↑ open up ↑
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
  24      - * Copyright 2013, Joyent, Inc. All rights reserved.
       24 + * Copyright 2016, Joyent, Inc.
  25   25   */
  26   26  
  27   27  /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
  28   28  /*        All Rights Reserved   */
  29   29  
  30   30  #include <sys/types.h>
  31   31  #include <sys/param.h>
  32   32  #include <sys/sysmacros.h>
  33   33  #include <sys/signal.h>
  34   34  #include <sys/cred.h>
↓ open down ↓ 42 lines elided ↑ open up ↑
  77   77  #include <sys/sdt.h>
  78   78  #include <sys/class.h>
  79   79  #include <sys/corectl.h>
  80   80  #include <sys/brand.h>
  81   81  #include <sys/fork.h>
  82   82  
  83   83  static int64_t cfork(int, int, int);
  84   84  static int getproc(proc_t **, pid_t, uint_t);
  85   85  #define GETPROC_USER    0x0
  86   86  #define GETPROC_KERNEL  0x1
       87 +#define GETPROC_ZSCHED  0x2
  87   88  
  88   89  static void fork_fail(proc_t *);
  89   90  static void forklwp_fail(proc_t *);
  90   91  
  91   92  int fork_fail_pending;
  92   93  
  93   94  extern struct kmem_cache *process_cache;
  94   95  
  95   96  /*
  96   97   * The vfork() system call trap is no longer invoked by libc.
↓ open down ↓ 21 lines elided ↑ open up ↑
 118  119          case 1:
 119  120                  return (cfork(0, 0, flags));    /* forkallx(flags) */
 120  121          case 2:
 121  122                  curthread->t_post_sys = 1;      /* so vfwait() will be called */
 122  123                  return (cfork(1, 1, flags));    /* vforkx(flags) */
 123  124          default:
 124  125                  return ((int64_t)set_errno(EINVAL));
 125  126          }
 126  127  }
 127  128  
      129 +/*
      130 + * Remove the associations of a child process from its parent and siblings.
      131 + */
      132 +static void
      133 +disown_proc(proc_t *pp, proc_t *cp)
      134 +{
      135 +        proc_t **orphpp;
      136 +
      137 +        ASSERT(MUTEX_HELD(&pidlock));
      138 +
      139 +        orphpp = &pp->p_orphan;
      140 +        while (*orphpp != cp)
      141 +                orphpp = &(*orphpp)->p_nextorph;
      142 +        *orphpp = cp->p_nextorph;
      143 +
      144 +        if (pp->p_child == cp)
      145 +                pp->p_child = cp->p_sibling;
      146 +        if (cp->p_sibling)
      147 +                cp->p_sibling->p_psibling = cp->p_psibling;
      148 +        if (cp->p_psibling)
      149 +                cp->p_psibling->p_sibling = cp->p_sibling;
      150 +}
      151 +
 128  152  /* ARGSUSED */
 129  153  static int64_t
 130  154  cfork(int isvfork, int isfork1, int flags)
 131  155  {
 132  156          proc_t *p = ttoproc(curthread);
 133  157          struct as *as;
 134      -        proc_t *cp, **orphpp;
      158 +        proc_t *cp;
 135  159          klwp_t *clone;
 136  160          kthread_t *t;
 137  161          task_t *tk;
 138  162          rval_t  r;
 139  163          int error;
 140  164          int i;
 141  165          rctl_set_t *dup_set;
 142  166          rctl_alloc_gp_t *dup_gp;
 143  167          rctl_entity_p_t e;
 144  168          lwpdir_t *ldp;
↓ open down ↓ 114 lines elided ↑ open up ↑
 259  283                  sprlock_proc(p);
 260  284                  p->p_flag |= SFORKING;
 261  285                  mutex_exit(&p->p_lock);
 262  286  
 263  287                  error = as_dup(p->p_as, cp);
 264  288                  if (error != 0) {
 265  289                          mutex_enter(&p->p_lock);
 266  290                          sprunlock(p);
 267  291                          fork_fail(cp);
 268  292                          mutex_enter(&pidlock);
 269      -                        orphpp = &p->p_orphan;
 270      -                        while (*orphpp != cp)
 271      -                                orphpp = &(*orphpp)->p_nextorph;
 272      -                        *orphpp = cp->p_nextorph;
 273      -                        if (p->p_child == cp)
 274      -                                p->p_child = cp->p_sibling;
 275      -                        if (cp->p_sibling)
 276      -                                cp->p_sibling->p_psibling = cp->p_psibling;
 277      -                        if (cp->p_psibling)
 278      -                                cp->p_psibling->p_sibling = cp->p_sibling;
      293 +                        disown_proc(p, cp);
 279  294                          mutex_enter(&cp->p_lock);
 280  295                          tk = cp->p_task;
 281  296                          task_detach(cp);
 282  297                          ASSERT(cp->p_pool->pool_ref > 0);
 283  298                          atomic_dec_32(&cp->p_pool->pool_ref);
 284  299                          mutex_exit(&cp->p_lock);
 285  300                          pid_exit(cp, tk);
 286  301                          mutex_exit(&pidlock);
 287  302                          task_rele(tk);
 288  303  
↓ open down ↓ 344 lines elided ↑ open up ↑
 633  648          /*
 634  649           * Detach failed child from task.
 635  650           */
 636  651          mutex_enter(&cp->p_lock);
 637  652          tk = cp->p_task;
 638  653          task_detach(cp);
 639  654          ASSERT(cp->p_pool->pool_ref > 0);
 640  655          atomic_dec_32(&cp->p_pool->pool_ref);
 641  656          mutex_exit(&cp->p_lock);
 642  657  
 643      -        orphpp = &p->p_orphan;
 644      -        while (*orphpp != cp)
 645      -                orphpp = &(*orphpp)->p_nextorph;
 646      -        *orphpp = cp->p_nextorph;
 647      -        if (p->p_child == cp)
 648      -                p->p_child = cp->p_sibling;
 649      -        if (cp->p_sibling)
 650      -                cp->p_sibling->p_psibling = cp->p_psibling;
 651      -        if (cp->p_psibling)
 652      -                cp->p_psibling->p_sibling = cp->p_sibling;
      658 +        disown_proc(p, cp);
 653  659          pid_exit(cp, tk);
 654  660          mutex_exit(&pidlock);
 655  661  
 656  662          task_rele(tk);
 657  663  
 658  664          mutex_enter(&p->p_lock);
 659  665          pool_barrier_exit();
 660  666          continuelwps(p);
 661  667          mutex_exit(&p->p_lock);
 662  668          error = EAGAIN;
↓ open down ↓ 26 lines elided ↑ open up ↑
 689  695          VN_RELE(PTOU(curproc)->u_cdir);
 690  696          if (PTOU(curproc)->u_rdir)
 691  697                  VN_RELE(PTOU(curproc)->u_rdir);
 692  698          if (cp->p_exec)
 693  699                  VN_RELE(cp->p_exec);
 694  700          if (cp->p_execdir)
 695  701                  VN_RELE(cp->p_execdir);
 696  702          if (PTOU(curproc)->u_cwd)
 697  703                  refstr_rele(PTOU(curproc)->u_cwd);
 698  704          if (PROC_IS_BRANDED(cp)) {
 699      -                brand_clearbrand(cp, B_TRUE);
      705 +                brand_clearbrand(cp, B_FALSE);
 700  706          }
 701  707  }
 702  708  
 703  709  /*
 704  710   * Clean up the lwps already created for this child process.
 705  711   * The fork failed while duplicating all the lwps of the parent
 706  712   * and those lwps already created must be freed.
 707  713   * This process is invisible to the rest of the system,
 708  714   * so we don't need to hold p->p_lock to protect the list.
 709  715   */
↓ open down ↓ 28 lines elided ↑ open up ↑
 738  744  
 739  745                  ASSERT(t->t_schedctl == NULL);
 740  746  
 741  747                  if (branded)
 742  748                          BROP(p)->b_freelwp(ttolwp(t));
 743  749  
 744  750                  if (t->t_door != NULL) {
 745  751                          kmem_free(t->t_door, sizeof (door_data_t));
 746  752                          t->t_door = NULL;
 747  753                  }
 748      -                lwp_ctmpl_clear(ttolwp(t));
      754 +                lwp_ctmpl_clear(ttolwp(t), B_FALSE);
 749  755  
 750  756                  /*
 751  757                   * Remove the thread from the all threads list.
 752  758                   * We need to hold pidlock for this.
 753  759                   */
 754  760                  mutex_enter(&pidlock);
 755  761                  t->t_next->t_prev = t->t_prev;
 756  762                  t->t_prev->t_next = t->t_next;
 757  763                  CL_EXIT(t);     /* tell the scheduler that we're exiting */
 758  764                  cv_broadcast(&t->t_joincv);     /* tell anyone in thread_join */
↓ open down ↓ 16 lines elided ↑ open up ↑
 775  781                  t->t_state = TS_FREE;
 776  782                  thread_rele(t);
 777  783                  thread_free(t);
 778  784          }
 779  785  }
 780  786  
 781  787  extern struct as kas;
 782  788  
 783  789  /*
 784  790   * fork a kernel process.
      791 + *
      792 + * Passing a pid argument of -1 indicates that the new process should be
      793 + * launched as a child of 'zsched' within the zone.
 785  794   */
 786  795  int
 787  796  newproc(void (*pc)(), caddr_t arg, id_t cid, int pri, struct contract **ct,
 788  797      pid_t pid)
 789  798  {
 790  799          proc_t *p;
 791  800          struct user *up;
 792  801          kthread_t *t;
 793  802          cont_process_t *ctp = NULL;
 794  803          rctl_entity_p_t e;
 795  804  
 796  805          ASSERT(cid != sysdccid);
 797  806          ASSERT(cid != syscid || ct == NULL);
 798  807          if (CLASS_KERNEL(cid)) {
 799  808                  rctl_alloc_gp_t *init_gp;
 800  809                  rctl_set_t *init_set;
 801  810  
 802  811                  ASSERT(pid != 1);
      812 +                ASSERT(pid >= 0);
 803  813  
 804  814                  if (getproc(&p, pid, GETPROC_KERNEL) < 0)
 805  815                          return (EAGAIN);
 806  816  
 807  817                  /*
 808  818                   * Release the hold on the p_exec and p_execdir, these
 809  819                   * were acquired in getproc()
 810  820                   */
 811  821                  if (p->p_execdir != NULL)
 812  822                          VN_RELE(p->p_execdir);
↓ open down ↓ 23 lines elided ↑ open up ↑
 836  846                  mutex_exit(&p->p_lock);
 837  847  
 838  848                  rctl_prealloc_destroy(init_gp);
 839  849  
 840  850                  t = lwp_kernel_create(p, pc, arg, TS_STOPPED, pri);
 841  851          } else {
 842  852                  rctl_alloc_gp_t *init_gp, *default_gp;
 843  853                  rctl_set_t *init_set;
 844  854                  task_t *tk, *tk_old;
 845  855                  klwp_t *lwp;
      856 +                boolean_t pzsched = B_FALSE;
      857 +                int flag = GETPROC_USER;
 846  858  
 847      -                if (getproc(&p, pid, GETPROC_USER) < 0)
      859 +                /* Handle a new user-level thread as child of zsched. */
      860 +                if (pid < 0) {
      861 +                        VERIFY(curzone != global_zone);
      862 +                        flag = GETPROC_ZSCHED;
      863 +                        pzsched = B_TRUE;
      864 +                        pid = 0;
      865 +                }
      866 +
      867 +                if (getproc(&p, pid, flag) < 0)
 848  868                          return (EAGAIN);
 849  869                  /*
 850  870                   * init creates a new task, distinct from the task
 851  871                   * containing kernel "processes".
 852  872                   */
 853  873                  tk = task_create(0, p->p_zone);
 854  874                  mutex_enter(&tk->tk_zone->zone_nlwps_lock);
 855  875                  tk->tk_proj->kpj_ntasks++;
 856  876                  tk->tk_nprocs++;
 857  877                  mutex_exit(&tk->tk_zone->zone_nlwps_lock);
↓ open down ↓ 21 lines elided ↑ open up ↑
 879  899                  rctlproc_default_init(p, default_gp);
 880  900                  mutex_exit(&p->p_lock);
 881  901  
 882  902                  task_rele(tk_old);
 883  903                  rctl_prealloc_destroy(default_gp);
 884  904                  rctl_prealloc_destroy(init_gp);
 885  905  
 886  906                  if ((lwp = lwp_create(pc, arg, 0, p, TS_STOPPED, pri,
 887  907                      &curthread->t_hold, cid, 1)) == NULL) {
 888  908                          task_t *tk;
      909 +
 889  910                          fork_fail(p);
 890  911                          mutex_enter(&pidlock);
      912 +                        disown_proc(p->p_parent, p);
      913 +
 891  914                          mutex_enter(&p->p_lock);
 892  915                          tk = p->p_task;
 893  916                          task_detach(p);
 894  917                          ASSERT(p->p_pool->pool_ref > 0);
 895  918                          atomic_add_32(&p->p_pool->pool_ref, -1);
 896  919                          mutex_exit(&p->p_lock);
      920 +
 897  921                          pid_exit(p, tk);
 898  922                          mutex_exit(&pidlock);
 899  923                          task_rele(tk);
 900      -
 901  924                          return (EAGAIN);
 902  925                  }
 903  926                  t = lwptot(lwp);
 904  927  
 905      -                ctp = contract_process_fork(sys_process_tmpl, p, curproc,
      928 +                ctp = contract_process_fork(sys_process_tmpl, p,
      929 +                    (pzsched ? curproc->p_zone->zone_zsched : curproc),
 906  930                      B_FALSE);
 907  931                  ASSERT(ctp != NULL);
 908  932                  if (ct != NULL)
 909  933                          *ct = &ctp->conp_contract;
 910  934          }
 911  935  
 912  936          ASSERT3U(t->t_tid, ==, 1);
 913  937          p->p_lwpid = 1;
 914  938          mutex_enter(&pidlock);
 915  939          pgjoin(p, p->p_parent->p_pgidp);
↓ open down ↓ 20 lines elided ↑ open up ↑
 936  960          uid_t           ruid;
 937  961          zoneid_t        zoneid;
 938  962          task_t          *task;
 939  963          kproject_t      *proj;
 940  964          zone_t          *zone;
 941  965          int             rctlfail = 0;
 942  966  
 943  967          if (zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN)
 944  968                  return (-1);    /* no point in starting new processes */
 945  969  
 946      -        pp = (flags & GETPROC_KERNEL) ? &p0 : curproc;
      970 +        if (flags & GETPROC_ZSCHED) {
      971 +                pp = curproc->p_zone->zone_zsched;
      972 +        } else {
      973 +                pp = (flags & GETPROC_KERNEL) ? &p0 : curproc;
      974 +        }
 947  975          task = pp->p_task;
 948  976          proj = task->tk_proj;
 949  977          zone = pp->p_zone;
 950  978  
 951  979          mutex_enter(&pp->p_lock);
 952  980          mutex_enter(&zone->zone_nlwps_lock);
 953  981          if (proj != proj0p) {
 954  982                  if (task->tk_nprocs >= task->tk_nprocs_ctl)
 955  983                          if (rctl_test(rc_task_nprocs, task->tk_rctls,
 956  984                              pp, 1, 0) & RCT_DENY)
↓ open down ↓ 40 lines elided ↑ open up ↑
 997 1025          cp->p_as = &kas;
 998 1026          /*
 999 1027           * p_zone must be set before we call pid_allocate since the process
1000 1028           * will be visible after that and code such as prfind_zone will
1001 1029           * look at the p_zone field.
1002 1030           */
1003 1031          cp->p_zone = pp->p_zone;
1004 1032          cp->p_t1_lgrpid = LGRP_NONE;
1005 1033          cp->p_tr_lgrpid = LGRP_NONE;
1006 1034  
     1035 +        /* Default to native brand initially */
     1036 +        cp->p_brand = &native_brand;
     1037 +
1007 1038          if ((newpid = pid_allocate(cp, pid, PID_ALLOC_PROC)) == -1) {
1008 1039                  if (nproc == v.v_proc) {
1009 1040                          CPU_STATS_ADDQ(CPU, sys, procovf, 1);
1010 1041                          cmn_err(CE_WARN, "out of processes");
1011 1042                  }
1012 1043                  goto bad;
1013 1044          }
1014 1045  
1015 1046          mutex_enter(&pp->p_lock);
1016 1047          cp->p_exec = pp->p_exec;
↓ open down ↓ 47 lines elided ↑ open up ↑
1064 1095  
1065 1096          cp->p_next = practive;
1066 1097          practive->p_prev = cp;
1067 1098          practive = cp;
1068 1099  
1069 1100          cp->p_ignore = pp->p_ignore;
1070 1101          cp->p_siginfo = pp->p_siginfo;
1071 1102          cp->p_flag = pp->p_flag & (SJCTL|SNOWAIT|SNOCD);
1072 1103          cp->p_sessp = pp->p_sessp;
1073 1104          sess_hold(pp);
1074      -        cp->p_brand = pp->p_brand;
1075      -        if (PROC_IS_BRANDED(pp))
1076      -                BROP(pp)->b_copy_procdata(cp, pp);
1077 1105          cp->p_bssbase = pp->p_bssbase;
1078 1106          cp->p_brkbase = pp->p_brkbase;
1079 1107          cp->p_brksize = pp->p_brksize;
1080 1108          cp->p_brkpageszc = pp->p_brkpageszc;
1081 1109          cp->p_stksize = pp->p_stksize;
1082 1110          cp->p_stkpageszc = pp->p_stkpageszc;
1083 1111          cp->p_stkprot = pp->p_stkprot;
1084 1112          cp->p_datprot = pp->p_datprot;
1085 1113          cp->p_usrstack = pp->p_usrstack;
1086 1114          cp->p_model = pp->p_model;
↓ open down ↓ 59 lines elided ↑ open up ↑
1146 1174           * are always attached to task0.
1147 1175           */
1148 1176          mutex_enter(&cp->p_lock);
1149 1177          if (flags & GETPROC_KERNEL)
1150 1178                  task_attach(task0p, cp);
1151 1179          else
1152 1180                  task_attach(pp->p_task, cp);
1153 1181          mutex_exit(&cp->p_lock);
1154 1182          mutex_exit(&pidlock);
1155 1183  
     1184 +        if (PROC_IS_BRANDED(pp)) {
     1185 +                /*
     1186 +                 * The only reason why process branding should fail is when
     1187 +                 * the procedure is complicated by multiple LWPs on the scene.
     1188 +                 * With an LWP count of 0, this newly allocated process has no
     1189 +                 * reason to fail branding.
     1190 +                 */
     1191 +                VERIFY0(brand_setbrand(cp, B_FALSE));
     1192 +
     1193 +                BROP(pp)->b_copy_procdata(cp, pp);
     1194 +        }
     1195 +
1156 1196          avl_create(&cp->p_ct_held, contract_compar, sizeof (contract_t),
1157 1197              offsetof(contract_t, ct_ctlist));
1158 1198  
1159 1199          /*
1160 1200           * Duplicate any audit information kept in the process table
1161 1201           */
1162 1202          if (audit_active)       /* copy audit data to cp */
1163 1203                  audit_newproc(cp);
1164 1204  
1165 1205          crhold(cp->p_cred = cr);
↓ open down ↓ 297 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX